[PATCH] remove unused variable from walk_addr_generic()

2013-01-08 Thread Gleb Natapov
Fix compilation warning.

Signed-off-by: Gleb Natapov g...@redhat.com
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a7b24cf..2ad76b9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
-   unsigned index, pt_access, pte_access, accessed_dirty, shift;
+   unsigned index, pt_access, pte_access, accessed_dirty;
gpa_t pte_gpa;
int offset;
const int write_fault = access  PFERR_WRITE_MASK;
--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Wanlong Gao
On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));

I got a assert here,
qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: 
Assertion `!peer_detach(n, i)' failed.

Any thoughts?

Thanks,
Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use dynamic percpu allocations for shared msrs area

2013-01-08 Thread Gleb Natapov
On Thu, Jan 03, 2013 at 11:41:39AM -0200, Marcelo Tosatti wrote:
 
 Andy, Mike, can you confirm whether this fixes the percpu allocation
 failures when loading kvm.ko? TIA
 
 
 
 Use dynamic percpu allocations for the shared msrs structure, 
 to avoid using the limited reserved percpu space.
 
 Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
 
Reviewed-by: Gleb Natapov g...@redhat.com

 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 1c9c834..5229a67 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -120,7 +120,7 @@ struct kvm_shared_msrs {
  };
  
  static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
 -static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
 +static struct kvm_shared_msrs __percpu *shared_msrs;
  
  struct kvm_stats_debugfs_item debugfs_entries[] = {
   { pf_fixed, VCPU_STAT(pf_fixed) },
 @@ -191,10 +191,10 @@ static void kvm_on_user_return(struct 
 user_return_notifier *urn)
  
  static void shared_msr_update(unsigned slot, u32 msr)
  {
 - struct kvm_shared_msrs *smsr;
   u64 value;
 + unsigned int cpu = smp_processor_id();
 + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
  
 - smsr = __get_cpu_var(shared_msrs);
   /* only read, and nobody should modify it at this time,
* so don't need lock */
   if (slot = shared_msrs_global.nr) {
 @@ -226,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void)
  
  void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
  {
 - struct kvm_shared_msrs *smsr = __get_cpu_var(shared_msrs);
 + unsigned int cpu = smp_processor_id();
 + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
  
   if (((value ^ smsr-values[slot].curr)  mask) == 0)
   return;
 @@ -242,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
  
  static void drop_user_return_notifiers(void *ignore)
  {
 - struct kvm_shared_msrs *smsr = __get_cpu_var(shared_msrs);
 + unsigned int cpu = smp_processor_id();
 + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
  
   if (smsr-registered)
   kvm_on_user_return(smsr-urn);
 @@ -5233,9 +5235,16 @@ int kvm_arch_init(void *opaque)
   goto out;
   }
  
 + r = -ENOMEM;
 + shared_msrs = alloc_percpu(struct kvm_shared_msrs);
 + if (!shared_msrs) {
 + printk(KERN_ERR kvm: failed to allocate percpu 
 kvm_shared_msrs\n);
 + goto out;
 + }
 +
   r = kvm_mmu_module_init();
   if (r)
 - goto out;
 + goto out_free_percpu;
  
   kvm_set_mmio_spte_mask();
   kvm_init_msr_list();
 @@ -5258,6 +5267,8 @@ int kvm_arch_init(void *opaque)
  
   return 0;
  
 +out_free_percpu:
 + free_percpu(shared_msrs);
  out:
   return r;
  }
 @@ -5275,6 +5286,7 @@ void kvm_arch_exit(void)
  #endif
   kvm_x86_ops = NULL;
   kvm_mmu_module_exit();
 + free_percpu(shared_msrs);
  }
  
  int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 11/12] virtio-net: migration support for multiqueue

2013-01-08 Thread Jason Wang
On 01/08/2013 03:10 PM, Michael S. Tsirkin wrote:
 On Fri, Dec 28, 2012 at 06:32:03PM +0800, Jason Wang wrote:
 This patch add migration support for multiqueue virtio-net. The version were
 bumped to 12.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  hw/virtio-net.c |   45 +++--
  1 files changed, 35 insertions(+), 10 deletions(-)

 diff --git a/hw/virtio-net.c b/hw/virtio-net.c
 index aaeef1b..ca4b804 100644
 --- a/hw/virtio-net.c
 +++ b/hw/virtio-net.c
 @@ -21,7 +21,7 @@
  #include virtio-net.h
  #include vhost_net.h
  
 -#define VIRTIO_NET_VM_VERSION11
 +#define VIRTIO_NET_VM_VERSION12
 Please don't, use a subsection instead.

Ok, but virtio-net is not converted to VMState, so we can just emulate
the subsection.
  #define MAC_TABLE_ENTRIES64
  #define MAX_VLAN(1  12)   /* Per 802.1Q definition */
 @@ -1058,16 +1058,18 @@ static void virtio_net_set_multiqueue(VirtIONet *n, 
 int multiqueue, int ctrl)
  
  static void virtio_net_save(QEMUFile *f, void *opaque)
  {
 +int i;
  VirtIONet *n = opaque;
 -VirtIONetQueue *q = n-vqs[0];
  
 -/* At this point, backend must be stopped, otherwise
 - * it might keep writing to memory. */
 -assert(!q-vhost_started);
 +for (i = 0; i  n-max_queues; i++) {
 +/* At this point, backend must be stopped, otherwise
 + * it might keep writing to memory. */
 +assert(!n-vqs[i].vhost_started);
 +}
  virtio_save(n-vdev, f);
  
  qemu_put_buffer(f, n-mac, ETH_ALEN);
 -qemu_put_be32(f, q-tx_waiting);
 +qemu_put_be32(f, n-vqs[0].tx_waiting);
  qemu_put_be32(f, n-mergeable_rx_bufs);
  qemu_put_be16(f, n-status);
  qemu_put_byte(f, n-promisc);
 @@ -1083,13 +1085,17 @@ static void virtio_net_save(QEMUFile *f, void 
 *opaque)
  qemu_put_byte(f, n-nouni);
  qemu_put_byte(f, n-nobcast);
  qemu_put_byte(f, n-has_ufo);
 +qemu_put_be16(f, n-max_queues);
 Above is specified by user so seems unnecessary in the migration stream.

It is used to prevent the following case:

Move a from a 4q to 2q with 1q active, if we don't do this, after
migration guest may still think it can have 4q.
 Below should only be put if relevant: check host feature bit
 set and/or max_queues  1.

Right.

 +qemu_put_be16(f, n-curr_queues);
 +for (i = 1; i  n-curr_queues; i++) {
 +qemu_put_be32(f, n-vqs[i].tx_waiting);
 +}
  }
  
  static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
  {
  VirtIONet *n = opaque;
 -VirtIONetQueue *q = n-vqs[0];
 -int ret, i;
 +int ret, i, link_down;
  
  if (version_id  2 || version_id  VIRTIO_NET_VM_VERSION)
  return -EINVAL;
 @@ -1100,7 +1106,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, 
 int version_id)
  }
  
  qemu_get_buffer(f, n-mac, ETH_ALEN);
 -q-tx_waiting = qemu_get_be32(f);
 +n-vqs[0].tx_waiting = qemu_get_be32(f);
  
  virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
  
 @@ -1170,6 +1176,22 @@ static int virtio_net_load(QEMUFile *f, void *opaque, 
 int version_id)
  }
  }
  
 +if (version_id = 12) {
 +if (n-max_queues != qemu_get_be16(f)) {
 +error_report(virtio-net: different max_queues );
 +return -1;
 +}
 +
 +n-curr_queues = qemu_get_be16(f);
 +for (i = 1; i  n-curr_queues; i++) {
 +n-vqs[i].tx_waiting = qemu_get_be32(f);
 +}
 +}
 +
 +virtio_net_set_queues(n);
 +/* Must do this again, since we may have more than one active queues. */
 s/queues/queue/

 Also I didn't understand why it's here.
 It seems that virtio has vm running callback,
 and that will invoke virtio_net_set_status after vm load.
 No?

True, will remove it next version.

Thanks

 +virtio_net_set_status(n-vdev, n-status);
 +
  /* Find the first multicast entry in the saved MAC filter */
  for (i = 0; i  n-mac_table.in_use; i++) {
  if (n-mac_table.macs[i * ETH_ALEN]  1) {
 @@ -1180,7 +1202,10 @@ static int virtio_net_load(QEMUFile *f, void *opaque, 
 int version_id)
  
  /* nc.link_down can't be migrated, so infer link_down according
   * to link status bit in n-status */
 -qemu_get_queue(n-nic)-link_down = (n-status  VIRTIO_NET_S_LINK_UP) 
 == 0;
 +link_down = (n-status  VIRTIO_NET_S_LINK_UP) == 0;
 +for (i = 0; i  n-max_queues; i++) {
 +qemu_get_subqueue(n-nic, i)-link_down = link_down;
 +}
  
  return 0;
  }
 -- 
 1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 07/12] virtio: introduce virtio_queue_del()

2013-01-08 Thread Jason Wang
On 01/08/2013 03:14 PM, Michael S. Tsirkin wrote:
 On Fri, Dec 28, 2012 at 06:31:59PM +0800, Jason Wang wrote:
 Some device (such as virtio-net) needs the ability to destroy or re-order the
 virtqueues, this patch adds a helper to do this.

 Signed-off-by: Jason Wang jasowang
 Actually del_queue unlike what the subject says :)

Oh, yes, will correct this.

 ---
  hw/virtio.c |9 +
  hw/virtio.h |2 ++
  2 files changed, 11 insertions(+), 0 deletions(-)

 diff --git a/hw/virtio.c b/hw/virtio.c
 index f40a8c5..bc3c9c3 100644
 --- a/hw/virtio.c
 +++ b/hw/virtio.c
 @@ -700,6 +700,15 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
 queue_size,
  return vdev-vq[i];
  }
  
 +void virtio_del_queue(VirtIODevice *vdev, int n)
 +{
 +if (n  0 || n = VIRTIO_PCI_QUEUE_MAX) {
 +abort();
 +}
 +
 +vdev-vq[n].vring.num = 0;
 +}
 +
  void virtio_irq(VirtQueue *vq)
  {
  trace_virtio_irq(vq);
 diff --git a/hw/virtio.h b/hw/virtio.h
 index 7c17f7b..f6cb0f9 100644
 --- a/hw/virtio.h
 +++ b/hw/virtio.h
 @@ -138,6 +138,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
 queue_size,
  void (*handle_output)(VirtIODevice *,
VirtQueue *));
  
 +void virtio_del_queue(VirtIODevice *vdev, int n);
 +
  void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  unsigned int len);
  void virtqueue_flush(VirtQueue *vq, unsigned int count);
 -- 
 1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Jason Wang
On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao

Thanks for the testing, which steps or cases did you met this assertion,
migration, reboot or just changing the number of virtqueues?

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Installation of Windows 8 hangs with KVM

2013-01-08 Thread Stefan Pietsch
* Ren, Yongjie yongjie@intel.com [2013-01-07 09:38]:

 you met issue only for 32bit Win8 (not 64 bit Win8), right?
 I think it's the same issue as the below bug I reported.
 https://bugs.launchpad.net/qemu/+bug/1007269
 You can try with '-cpu coreduo' or '-cpu core2duo' in qemu-kvm command line.

Yes, I'm talking about 32bit Windows 8. The CPU does not support 64bit.
Booting with -cpu coreduo or core2duo does not show the 5D error but
a black screen after a few minutes.

I'm not able to do a successful boot of the installation ISO with KVM.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Wanlong Gao
On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?

I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.

I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d 
-device 
virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

I pre-opened two tap fds, did I missing something?

Thanks,
Wanlong Gao

 
 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Jason Wang
On 01/08/2013 05:49 PM, Wanlong Gao wrote:
 On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?
 I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.

 I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d 
 -device 
 virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

 I pre-opened two tap fds, did I missing something?

Nothing missed :) It should work.

Could you please try not use fd=X and let qemu to create the file
descriptors by itself? Btw, how did you create the two tap fds?

Thanks

 Thanks,
 Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Wanlong Gao
On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?

It may because my host doesn't support muti-tap, I'll try with the upstream 
kernel again.

Thanks,
Wanlong Gao

 
 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Wanlong Gao
On 01/08/2013 05:51 PM, Jason Wang wrote:
 On 01/08/2013 05:49 PM, Wanlong Gao wrote:
 On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?
 I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.

 I just can't start the QEMU use  -netdev 
 tap,id=hostnet0,queues=2,fd=%d,fd=%d -device 
 virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

 I pre-opened two tap fds, did I missing something?
 
 Nothing missed :) It should work.
 
 Could you please try not use fd=X and let qemu to create the file
 descriptors by itself? Btw, how did you create the two tap fds?

Can it create descriptors itself? I get 
qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be 
initialized

I create the tap fd like this, and dup create the second fd, third fd, right?

int tap_fd = open(/dev/net/tun, O_RDWR);
int vhost_fd = open(/dev/vhost-net, O_RDWR);
char *tap_name = tap;
char cmd[2048];
char brctl[256];
char netup[256];
struct ifreq ifr;
if (tap_fd  0) {
printf(open tun device failed\n);
return -1;
}
if (vhost_fd  0) {
printf(open vhost-net device failed\n);
return -1;
}
memset(ifr, 0, sizeof(ifr));
memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;

/*
 * setup tap net device
 */
if (ioctl(tap_fd, TUNSETIFF, ifr)  0) {
printf(setup tap net device failed\n);
return -1;
}

sprintf(brctl, brctl addif virbr0 %s, tap_name);
sprintf(netup, ifconfig %s up, tap_name);
system(brctl);
system(netup);

Thanks,
Wanlong Gao


 
 Thanks

 Thanks,
 Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Gleb Natapov
On Mon, Jan 07, 2013 at 07:32:39PM -0200, Marcelo Tosatti wrote:
 On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
   ioapic_write (or any other ioapic update)
   lock()
   perform update
   make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
   unlock()
   
   (*) Similarly to TLB flush.
   
   The advantage is that all work becomes vcpu local. The end result
   is much simpler code.
  What complexity will it remove?
 
 Synchronization between multiple CPUs (except the KVM_REQ_ bit
 processing, which is infrastructure shared by other parts of KVM).
 
Synchronization is just a lock around bitmap access. Can be replaced
with RCU if it turns to be performance problem.

 We agreed that performance is non issue here.
Yes, if the code is indeed simpler we can take the hit, although
recalculating bitmap 255 times instead of one for -smp 255 looks like a
little bit excessive, but I do not see considerable simplification (if
at all).

So as far as I understand you are proposing:

vcpu0 or io thread:   |vcpu1:
ioapic_write (or other ioapic update) |
 lock(exitbitmap) |
 if (on vcpu) |
   ioapic_update_my_eoi_exitmap() |
 make_all_vcpus_request(update)   |if (update requested)
  |  ioapic_update_my_eoi_exitmap()
 unlock(exitbitmap)   |

The current patch logic is this:

vcpu0 or io thread:   |  vcpu1:
ioapic_write (or other ioapic update) |
 lock(exitbitmap) |
 ioapic_update_all_eoi_exitmaps() |
 make request on each vcpu| 
 kick each vcpu   | if (update requested)
 unlock(exitbitmap)   |lock(exitbitmap)
  |load_exitbitmap()
  |unlock(exitbitmap)

If I described correctly what you are proposing I do not
see simplification since the bulk of the complexity is in the
ioapic_update_(my|all)_eoi_exitmap() and they will be the same in both
implementations. Actually I do see complication in your idea introduced
by the fact that the case when update is done from vcpu thread have to
be handled specially.

The proposed patch may be simplified further by 
make_all_vcpus_request_async(update)(*)
instead of making request and kicking each vcpu individually. In fact
the way it is done now is buggy since requests are made only for vcpus
with bit set in their bitmask, but if bit is cleared request is not made
so vcpu can run with stale bitmask.

(*) not exists yet as far as I see.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Jason Wang
On 01/08/2013 06:00 PM, Wanlong Gao wrote:
 On 01/08/2013 05:51 PM, Jason Wang wrote:
 On 01/08/2013 05:49 PM, Wanlong Gao wrote:
 On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?
 I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap 
 support.

 I just can't start the QEMU use  -netdev 
 tap,id=hostnet0,queues=2,fd=%d,fd=%d -device 
 virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

 I pre-opened two tap fds, did I missing something?
 Nothing missed :) It should work.

 Could you please try not use fd=X and let qemu to create the file
 descriptors by itself? Btw, how did you create the two tap fds?
 Can it create descriptors itself? I get 
 qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not 
 be initialized

You need prepare an ifup script which default at /etc/qemu-ifup (like
following). Or you may try to add a script=no after:

#!/bin/sh

switch=kvmbr0

/sbin/ifconfig $1 0.0.0.0 up
/usr/sbin/brctl addif $switch $1
/usr/sbin/brctl stp $switch off

This will let qemu create a tap fd itself and make it to be connected to
a port of the bridge caled kvmbr0.

 I create the tap fd like this, and dup create the second fd, third fd, right?

The second and third fd should be created with TUNSETIFF with the same
tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
the kernel you want to create a multiqueue tap device, otherwise the
second and third calling of TUNSETIFF will fail.

Thanks

   int tap_fd = open(/dev/net/tun, O_RDWR);
   int vhost_fd = open(/dev/vhost-net, O_RDWR);
   char *tap_name = tap;
   char cmd[2048];
   char brctl[256];
   char netup[256];
   struct ifreq ifr;
   if (tap_fd  0) {
   printf(open tun device failed\n);
   return -1;
   }
   if (vhost_fd  0) {
   printf(open vhost-net device failed\n);
   return -1;
   }
   memset(ifr, 0, sizeof(ifr));
   memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
   ifr.ifr_flags = IFF_TAP | IFF_NO_PI;

   /*
* setup tap net device
*/
   if (ioctl(tap_fd, TUNSETIFF, ifr)  0) {
   printf(setup tap net device failed\n);
   return -1;
   }

   sprintf(brctl, brctl addif virbr0 %s, tap_name);
   sprintf(netup, ifconfig %s up, tap_name);
   system(brctl);
   system(netup);

 Thanks,
 Wanlong Gao


 Thanks
 Thanks,
 Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int 
 ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/7] KVM: Alleviate mmu_lock hold time when we start dirty logging

2013-01-08 Thread Takuya Yoshikawa
On Mon, 7 Jan 2013 18:36:42 -0200
Marcelo Tosatti mtosa...@redhat.com wrote:

 Looks good, except patch 1 - 
 
 a) don't understand why it is necessary and 

What's really necessary is to make sure that we don't call the function
for a deleted slot.  My explanation was wrong.

 b) not confident its safe - isnt clearing necessary for KVM_SET_MEMORY
 instances other than
 
 !(old.flags  LOG_DIRTY)  (new.flags  LOG_DIRTY)

I think flushing shadows should be enough for other cases, e.g. moving a slot.

But I've changed the condition (see v2) to make it easier to understand:
  npages  LOG_DIRTY

Since remove_write_access() is for dirty logging, this condition should be safe.

Thanks,
Takuya
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/7 -v2] KVM: Alleviate mmu_lock hold time when we start dirty logging

2013-01-08 Thread Takuya Yoshikawa
Changelog v1-v2:
  The condition in patch 1 was changed like this:
npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES)

This patch set makes kvm_mmu_slot_remove_write_access() rmap based and
adds conditional rescheduling to it.

The motivation for this change is of course to reduce the mmu_lock hold
time when we start dirty logging for a large memory slot.  You may not
see the problem if you just give 8GB or less of the memory to the guest
with THP enabled on the host -- this is for the worst case.

Takuya Yoshikawa (7):
  KVM: Write protect the updated slot only when dirty logging is enabled
  KVM: MMU: Remove unused parameter level from __rmap_write_protect()
  KVM: MMU: Make kvm_mmu_slot_remove_write_access() rmap based
  KVM: Remove unused slot_bitmap from kvm_mmu_page
  KVM: Make kvm_mmu_change_mmu_pages() take mmu_lock by itself
  KVM: Make kvm_mmu_slot_remove_write_access() take mmu_lock by itself
  KVM: MMU: Conditionally reschedule when kvm_mmu_slot_remove_write_access() 
takes a long time

 Documentation/virtual/kvm/mmu.txt |7 
 arch/x86/include/asm/kvm_host.h   |5 ---
 arch/x86/kvm/mmu.c|   56 +++-
 arch/x86/kvm/x86.c|   12 ---
 virt/kvm/kvm_main.c   |1 -
 5 files changed, 37 insertions(+), 44 deletions(-)

-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7] KVM: Write protect the updated slot only when dirty logging is enabled

2013-01-08 Thread Takuya Yoshikawa
Calling kvm_mmu_slot_remove_write_access() for a deleted slot does
nothing but search for non-existent mmu pages which have mappings to
that deleted memory; this is safe but a waste of time.

Since we want to make the function rmap based in a later patch, in a
manner which makes it unsafe to be called for a deleted slot, we makes
the caller see if the slot is non-zero and being dirty logged.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/x86.c  |8 +++-
 virt/kvm/kvm_main.c |1 -
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1c9c834..add5e48 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6897,7 +6897,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
spin_lock(kvm-mmu_lock);
if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
-   kvm_mmu_slot_remove_write_access(kvm, mem-slot);
+   /*
+* Write protect all pages for dirty logging.
+* Existing largepage mappings are destroyed here and new ones will
+* not be created until the end of the logging.
+*/
+   if (npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES))
+   kvm_mmu_slot_remove_write_access(kvm, mem-slot);
spin_unlock(kvm-mmu_lock);
/*
 * If memory slot is created, or moved, we need to clear all
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e45c20c..f689a6d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -817,7 +817,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
if ((new.flags  KVM_MEM_LOG_DIRTY_PAGES)  !new.dirty_bitmap) {
if (kvm_create_dirty_bitmap(new)  0)
goto out_free;
-   /* destroy any largepage mappings for dirty tracking */
}
 
if (!npages || base_gfn != old.base_gfn) {
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/7] KVM: MMU: Remove unused parameter level from __rmap_write_protect()

2013-01-08 Thread Takuya Yoshikawa
No longer need to care about the mapping level in this function.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2a..bee3509 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool 
*flush, bool pt_protect)
 }
 
 static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
-int level, bool pt_protect)
+bool pt_protect)
 {
u64 *sptep;
struct rmap_iterator iter;
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
while (mask) {
rmapp = __gfn_to_rmap(slot-base_gfn + gfn_offset + __ffs(mask),
  PT_PAGE_TABLE_LEVEL, slot);
-   __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
+   __rmap_write_protect(kvm, rmapp, false);
 
/* clear the first set bit */
mask = mask - 1;
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
for (i = PT_PAGE_TABLE_LEVEL;
 i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
rmapp = __gfn_to_rmap(gfn, i, slot);
-   write_protected |= __rmap_write_protect(kvm, rmapp, i, true);
+   write_protected |= __rmap_write_protect(kvm, rmapp, true);
}
 
return write_protected;
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/7] KVM: MMU: Make kvm_mmu_slot_remove_write_access() rmap based

2013-01-08 Thread Takuya Yoshikawa
This makes it possible to release mmu_lock and reschedule conditionally
in a later patch.  Although this may increase the time needed to protect
the whole slot when we start dirty logging, the kernel should not allow
the userspace to trigger something that will hold a spinlock for such a
long time as tens of milliseconds: actually there is no limit since it
is roughly proportional to the number of guest pages.

Another point to note is that this patch removes the only user of
slot_bitmap which will cause some problems when we increase the number
of slots further.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |   28 +++-
 1 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bee3509..b4d4fd1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4198,25 +4198,27 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 {
-   struct kvm_mmu_page *sp;
-   bool flush = false;
+   struct kvm_memory_slot *memslot;
+   gfn_t last_gfn;
+   int i;
 
-   list_for_each_entry(sp, kvm-arch.active_mmu_pages, link) {
-   int i;
-   u64 *pt;
+   memslot = id_to_memslot(kvm-memslots, slot);
+   last_gfn = memslot-base_gfn + memslot-npages - 1;
 
-   if (!test_bit(slot, sp-slot_bitmap))
-   continue;
+   for (i = PT_PAGE_TABLE_LEVEL;
+i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+   unsigned long *rmapp;
+   unsigned long last_index, index;
 
-   pt = sp-spt;
-   for (i = 0; i  PT64_ENT_PER_PAGE; ++i) {
-   if (!is_shadow_present_pte(pt[i]) ||
- !is_last_spte(pt[i], sp-role.level))
-   continue;
+   rmapp = memslot-arch.rmap[i - PT_PAGE_TABLE_LEVEL];
+   last_index = gfn_to_index(last_gfn, memslot-base_gfn, i);
 
-   spte_write_protect(kvm, pt[i], flush, false);
+   for (index = 0; index = last_index; ++index, ++rmapp) {
+   if (*rmapp)
+   __rmap_write_protect(kvm, rmapp, false);
}
}
+
kvm_flush_remote_tlbs(kvm);
 }
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/7] KVM: Remove unused slot_bitmap from kvm_mmu_page

2013-01-08 Thread Takuya Yoshikawa
Not needed any more.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 Documentation/virtual/kvm/mmu.txt |7 ---
 arch/x86/include/asm/kvm_host.h   |5 -
 arch/x86/kvm/mmu.c|   10 --
 3 files changed, 0 insertions(+), 22 deletions(-)

diff --git a/Documentation/virtual/kvm/mmu.txt 
b/Documentation/virtual/kvm/mmu.txt
index fa5f1db..43fcb76 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -187,13 +187,6 @@ Shadow pages contain the following information:
 perform a reverse map from a pte to a gfn. When role.direct is set, any
 element of this array can be calculated from the gfn field when used, in
 this case, the array of gfns is not allocated. See role.direct and gfn.
-  slot_bitmap:
-A bitmap containing one bit per memory slot.  If the page contains a pte
-mapping a page from memory slot n, then bit n of slot_bitmap will be set
-(if a page is aliased among several slots, then it is not guaranteed that
-all slots will be marked).
-Used during dirty logging to avoid scanning a shadow page if none if its
-pages need tracking.
   root_count:
 A counter keeping track of how many hardware registers (guest cr3 or
 pdptrs) are now pointing at the page.  While this counter is nonzero, the
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c431b33..f75e1fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -219,11 +219,6 @@ struct kvm_mmu_page {
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
-   /*
-* One bit set per slot which has memory
-* in this shadow page.
-*/
-   DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
bool unsync;
int root_count;  /* Currently serving as active root */
unsigned int unsync_children;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b4d4fd1..bb964b3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1522,7 +1522,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
kvm_vcpu *vcpu,
sp-gfns = mmu_memory_cache_alloc(vcpu-arch.mmu_page_cache);
set_page_private(virt_to_page(sp-spt), (unsigned long)sp);
list_add(sp-link, vcpu-kvm-arch.active_mmu_pages);
-   bitmap_zero(sp-slot_bitmap, KVM_MEM_SLOTS_NUM);
sp-parent_ptes = 0;
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
kvm_mod_used_mmu_pages(vcpu-kvm, +1);
@@ -2183,14 +2182,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
-static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
-{
-   int slot = memslot_id(kvm, gfn);
-   struct kvm_mmu_page *sp = page_header(__pa(pte));
-
-   __set_bit(slot, sp-slot_bitmap);
-}
-
 /*
  * The function is based on mtrr_type_lookup() in
  * arch/x86/kernel/cpu/mtrr/generic.c
@@ -2497,7 +2488,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 
*sptep,
++vcpu-kvm-stat.lpages;
 
if (is_shadow_present_pte(*sptep)) {
-   page_header_update_slot(vcpu-kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
if (rmap_count  RMAP_RECYCLE_THRESHOLD)
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/7] KVM: Make kvm_mmu_change_mmu_pages() take mmu_lock by itself

2013-01-08 Thread Takuya Yoshikawa
No reason to make callers take mmu_lock since we do not need to protect
kvm_mmu_change_mmu_pages() and kvm_mmu_slot_remove_write_access()
together by mmu_lock in kvm_arch_commit_memory_region(): the former
calls kvm_mmu_commit_zap_page() and flushes TLBs by itself.

Note: we do not need to protect kvm-arch.n_requested_mmu_pages by
mmu_lock as can be seen from the fact that it is read locklessly.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |4 
 arch/x86/kvm/x86.c |9 -
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bb964b3..fc7d84a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2143,6 +2143,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
int goal_nr_mmu_pages)
 * change the value
 */
 
+   spin_lock(kvm-mmu_lock);
+
if (kvm-arch.n_used_mmu_pages  goal_nr_mmu_pages) {
while (kvm-arch.n_used_mmu_pages  goal_nr_mmu_pages 
!list_empty(kvm-arch.active_mmu_pages)) {
@@ -2157,6 +2159,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
int goal_nr_mmu_pages)
}
 
kvm-arch.n_max_mmu_pages = goal_nr_mmu_pages;
+
+   spin_unlock(kvm-mmu_lock);
 }
 
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index add5e48..080bbdc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3270,12 +3270,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm 
*kvm,
return -EINVAL;
 
mutex_lock(kvm-slots_lock);
-   spin_lock(kvm-mmu_lock);
 
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
kvm-arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-   spin_unlock(kvm-mmu_lock);
mutex_unlock(kvm-slots_lock);
return 0;
 }
@@ -6894,7 +6892,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
if (!kvm-arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 
-   spin_lock(kvm-mmu_lock);
if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
/*
@@ -6902,9 +6899,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 * Existing largepage mappings are destroyed here and new ones will
 * not be created until the end of the logging.
 */
-   if (npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES))
+   if (npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES)) {
+   spin_lock(kvm-mmu_lock);
kvm_mmu_slot_remove_write_access(kvm, mem-slot);
-   spin_unlock(kvm-mmu_lock);
+   spin_unlock(kvm-mmu_lock);
+   }
/*
 * If memory slot is created, or moved, we need to clear all
 * mmio sptes.
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/7] KVM: Make kvm_mmu_slot_remove_write_access() take mmu_lock by itself

2013-01-08 Thread Takuya Yoshikawa
Better to place mmu_lock handling and TLB flushing code together since
this is a self-contained function.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |3 +++
 arch/x86/kvm/x86.c |5 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fc7d84a..b7a1235 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4199,6 +4199,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
memslot = id_to_memslot(kvm-memslots, slot);
last_gfn = memslot-base_gfn + memslot-npages - 1;
 
+   spin_lock(kvm-mmu_lock);
+
for (i = PT_PAGE_TABLE_LEVEL;
 i  PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
unsigned long *rmapp;
@@ -4214,6 +4216,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
}
 
kvm_flush_remote_tlbs(kvm);
+   spin_unlock(kvm-mmu_lock);
 }
 
 void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 080bbdc..5483228 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6899,11 +6899,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 * Existing largepage mappings are destroyed here and new ones will
 * not be created until the end of the logging.
 */
-   if (npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES)) {
-   spin_lock(kvm-mmu_lock);
+   if (npages  (mem-flags  KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_slot_remove_write_access(kvm, mem-slot);
-   spin_unlock(kvm-mmu_lock);
-   }
/*
 * If memory slot is created, or moved, we need to clear all
 * mmio sptes.
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/7] KVM: MMU: Conditionally reschedule when kvm_mmu_slot_remove_write_access() takes a long time

2013-01-08 Thread Takuya Yoshikawa
If the userspace starts dirty logging for a large slot, say 64GB of
memory, kvm_mmu_slot_remove_write_access() needs to hold mmu_lock for
a long time such as tens of milliseconds.  This patch controls the lock
hold time by asking the scheduler if we need to reschedule for others.

One penalty for this is that we need to flush TLBs before releasing
mmu_lock.  But since holding mmu_lock for a long time does affect not
only the guest, vCPU threads in other words, but also the host as a
whole, we should pay for that.

In practice, the cost will not be so high because we can protect a fair
amount of memory before being rescheduled: on my test environment,
cond_resched_lock() was called only once for protecting 12GB of memory
even without THP.  We can also revisit Avi's unlocked TLB flush work
later for completely suppressing extra TLB flushes if needed.

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b7a1235..a32e8cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4212,6 +4212,11 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
for (index = 0; index = last_index; ++index, ++rmapp) {
if (*rmapp)
__rmap_write_protect(kvm, rmapp, false);
+
+   if (need_resched() || spin_needbreak(kvm-mmu_lock)) {
+   kvm_flush_remote_tlbs(kvm);
+   cond_resched_lock(kvm-mmu_lock);
+   }
}
}
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: VMX: fix incorrect cached cpl value with real/v8086 modes (v3)

2013-01-08 Thread Gleb Natapov
On Mon, Jan 07, 2013 at 07:27:06PM -0200, Marcelo Tosatti wrote:
 
 CPL is always 0 when in real mode, and always 3 when virtual 8086 mode.
 
 Using values other than those can cause failures on operations that
 check CPL.
 
 Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
 
Reviewed-by: Gleb Natapov g...@redhat.com

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 55dfc37..dd2a85c 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -1696,7 +1696,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu 
 *vcpu)
  static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
  {
   __set_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail);
 - __clear_bit(VCPU_EXREG_CPL, (ulong *)vcpu-arch.regs_avail);
   to_vmx(vcpu)-rflags = rflags;
   if (to_vmx(vcpu)-rmode.vm86_active) {
   to_vmx(vcpu)-rmode.save_rflags = rflags;
 @@ -3110,7 +3109,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned 
 long cr0)
   vmcs_writel(CR0_READ_SHADOW, cr0);
   vmcs_writel(GUEST_CR0, hw_cr0);
   vcpu-arch.cr0 = cr0;
 - __clear_bit(VCPU_EXREG_CPL, (ulong *)vcpu-arch.regs_avail);
  }
  
  static u64 construct_eptp(unsigned long root_hpa)
 @@ -3220,8 +3218,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, 
 int seg)
   return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
  }
  
 -static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
 +static int vmx_get_cpl(struct kvm_vcpu *vcpu)
  {
 + struct vcpu_vmx *vmx = to_vmx(vcpu);
 +
   if (!is_protmode(vcpu))
   return 0;
  
 @@ -3229,13 +3229,6 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
(kvm_get_rflags(vcpu)  X86_EFLAGS_VM)) /* if virtual 8086 */
   return 3;
  
 - return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS)  3;
 -}
 -
 -static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 -{
 - struct vcpu_vmx *vmx = to_vmx(vcpu);
 -
   /*
* If we enter real mode with cs.sel  3 != 0, the normal CPL 
 calculations
* fail; use the cache instead.
 @@ -3246,7 +3239,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
  
   if (!test_bit(VCPU_EXREG_CPL, (ulong *)vcpu-arch.regs_avail)) {
   __set_bit(VCPU_EXREG_CPL, (ulong *)vcpu-arch.regs_avail);
 - vmx-cpl = __vmx_get_cpl(vcpu);
 + vmx-cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS)  3;
   }
  
   return vmx-cpl;

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Wanlong Gao
On 01/08/2013 06:14 PM, Jason Wang wrote:
 On 01/08/2013 06:00 PM, Wanlong Gao wrote:
 On 01/08/2013 05:51 PM, Jason Wang wrote:
 On 01/08/2013 05:49 PM, Wanlong Gao wrote:
 On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?
 I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap 
 support.

 I just can't start the QEMU use  -netdev 
 tap,id=hostnet0,queues=2,fd=%d,fd=%d -device 
 virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

 I pre-opened two tap fds, did I missing something?
 Nothing missed :) It should work.

 Could you please try not use fd=X and let qemu to create the file
 descriptors by itself? Btw, how did you create the two tap fds?
 Can it create descriptors itself? I get 
 qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not 
 be initialized
 
 You need prepare an ifup script which default at /etc/qemu-ifup (like
 following). Or you may try to add a script=no after:
 
 #!/bin/sh
 
 switch=kvmbr0
 
 /sbin/ifconfig $1 0.0.0.0 up
 /usr/sbin/brctl addif $switch $1
 /usr/sbin/brctl stp $switch off
 
 This will let qemu create a tap fd itself and make it to be connected to
 a port of the bridge caled kvmbr0.

 I create the tap fd like this, and dup create the second fd, third fd, right?
 
 The second and third fd should be created with TUNSETIFF with the same
 tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
 the kernel you want to create a multiqueue tap device, otherwise the
 second and third calling of TUNSETIFF will fail.

Thank you for teaching me, I'll try it tomorrow.

Regards,
Wanlong Gao

 
 Thanks

  int tap_fd = open(/dev/net/tun, O_RDWR);
  int vhost_fd = open(/dev/vhost-net, O_RDWR);
  char *tap_name = tap;
  char cmd[2048];
  char brctl[256];
  char netup[256];
  struct ifreq ifr;
  if (tap_fd  0) {
  printf(open tun device failed\n);
  return -1;
  }
  if (vhost_fd  0) {
  printf(open vhost-net device failed\n);
  return -1;
  }
  memset(ifr, 0, sizeof(ifr));
  memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
  ifr.ifr_flags = IFF_TAP | IFF_NO_PI;

  /*
   * setup tap net device
   */
  if (ioctl(tap_fd, TUNSETIFF, ifr)  0) {
  printf(setup tap net device failed\n);
  return -1;
  }

  sprintf(brctl, brctl addif virbr0 %s, tap_name);
  sprintf(netup, ifconfig %s up, tap_name);
  system(brctl);
  system(netup);

 Thanks,
 Wanlong Gao


 Thanks
 Thanks,
 Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, 
 int ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 0/7] Streamline arithmetic instruction emulation

2013-01-08 Thread Gleb Natapov
On Fri, Jan 04, 2013 at 04:18:47PM +0200, Avi Kivity wrote:
 The current arithmetic instruction emulation is fairly clumsy: after
 decode, each instruction gets a switch (size), and for every size
 we fetch the operands, prepare flags, emulate the instruction, then store
 back the flags and operands.
 
 This patchset simplifies things by moving everything into common code
 except the instruction itself.  All the pre- and post- processing is
 coded just once.  The per-instrution code looks like:
 
   add %bl, %al
   ret
 
   add %bx, %ax
   ret
 
   add %ebx, %eax
   ret
 
   add %rbx, %rax
   ret
 
 The savings in size, for the ten instructions converted in this patchset,
 are fairly large:
 
text  data bss dec hex filename
   63724 0   0   63724f8ec arch/x86/kvm/emulate.o.before
   61268 0   0   61268ef54 arch/x86/kvm/emulate.o.after
 
 - around 2500 bytes.
 
 v3: fix reversed operand order in 2-operand macro
 
 v2: rebased
 
Acked-by: Gleb Natapov g...@redhat.com

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/7] KVM: Write protect the updated slot only when we start dirty logging

2013-01-08 Thread Gleb Natapov
On Mon, Jan 07, 2013 at 06:11:46PM -0200, Marcelo Tosatti wrote:
 On Tue, Dec 18, 2012 at 04:26:47PM +0900, Takuya Yoshikawa wrote:
  This is needed to make kvm_mmu_slot_remove_write_access() rmap based:
  otherwise we may end up using invalid rmap's.
  
  Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
 
 Why? memslot-arch.rmap[] has been properly allocated at this point.
 
FWIW a long time ago in a galaxy far, far away there was a check for
KVM_MEM_LOG_DIRTY_PAGES before call to kvm_mmu_slot_remove_write_access(),
but it was removed by 90cb0529dd230548a7, as far as I can tell, accidentally.

  ---
   arch/x86/kvm/x86.c  |9 -
   virt/kvm/kvm_main.c |1 -
   2 files changed, 8 insertions(+), 2 deletions(-)
  
  diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  index 1c9c834..9451efa 100644
  --- a/arch/x86/kvm/x86.c
  +++ b/arch/x86/kvm/x86.c
  @@ -6897,7 +6897,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
  spin_lock(kvm-mmu_lock);
  if (nr_mmu_pages)
  kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
  -   kvm_mmu_slot_remove_write_access(kvm, mem-slot);
  +   /*
  +* Write protect all pages for dirty logging.
  +* Existing largepage mappings are destroyed here and new ones will
  +* not be created until the end of the logging.
  +*/
  +   if ((mem-flags  KVM_MEM_LOG_DIRTY_PAGES) 
  +   !(old.flags  KVM_MEM_LOG_DIRTY_PAGES))
  +   kvm_mmu_slot_remove_write_access(kvm, mem-slot);
  spin_unlock(kvm-mmu_lock);
  /*
   * If memory slot is created, or moved, we need to clear all
  diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
  index bd31096..0ef5daa 100644
  --- a/virt/kvm/kvm_main.c
  +++ b/virt/kvm/kvm_main.c
  @@ -805,7 +805,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
  if ((new.flags  KVM_MEM_LOG_DIRTY_PAGES)  !new.dirty_bitmap) {
  if (kvm_create_dirty_bitmap(new)  0)
  goto out_free;
  -   /* destroy any largepage mappings for dirty tracking */
  }
   
  if (!npages || base_gfn != old.base_gfn) {
  -- 
  1.7.5.4
  
  --
  To unsubscribe from this list: send the line unsubscribe kvm in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: trace: Fix exit decoding.

2013-01-08 Thread Cornelia Huck
trace_kvm_userspace_exit has been missing the KVM_EXIT_WATCHDOG exit.

CC: Bharat Bhushan r65...@freescale.com
Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com
---
 include/trace/events/kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index a23f47c..19911dd 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -14,7 +14,7 @@
ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),\
ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL),   \
-   ERSN(S390_UCONTROL), ERSN(S390_TSCH)
+   ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH)
 
 TRACE_EVENT(kvm_userspace_exit,
TP_PROTO(__u32 reason, int errno),
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Zhang, Yang Z
Gleb Natapov wrote on 2013-01-08:
 On Mon, Jan 07, 2013 at 07:32:39PM -0200, Marcelo Tosatti wrote:
 On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
 ioapic_write (or any other ioapic update)
 lock()
 perform update
 make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
 unlock()
 
 (*) Similarly to TLB flush.
 
 The advantage is that all work becomes vcpu local. The end result
 is much simpler code.
 What complexity will it remove?
 
 Synchronization between multiple CPUs (except the KVM_REQ_ bit
 processing, which is infrastructure shared by other parts of KVM).
 
 Synchronization is just a lock around bitmap access. Can be replaced
 with RCU if it turns to be performance problem.
 
 We agreed that performance is non issue here.
 Yes, if the code is indeed simpler we can take the hit, although
 recalculating bitmap 255 times instead of one for -smp 255 looks like a
 little bit excessive, but I do not see considerable simplification (if
 at all).
 
 So as far as I understand you are proposing:
 
 vcpu0 or io thread:   |vcpu1:
 ioapic_write (or other ioapic update) |
  lock(exitbitmap) |
  if (on vcpu) |
ioapic_update_my_eoi_exitmap() |
  make_all_vcpus_request(update)   |if (update requested)
   |
 ioapic_update_my_eoi_exitmap()
  unlock(exitbitmap)   |
 The current patch logic is this:
 
 vcpu0 or io thread:   |  vcpu1:
 ioapic_write (or other ioapic update) |
  lock(exitbitmap) |
  ioapic_update_all_eoi_exitmaps() |
  make request on each vcpu|
  kick each vcpu   | if (update requested)
  unlock(exitbitmap)   |lock(exitbitmap)
   |load_exitbitmap()
   |unlock(exitbitmap)
 If I described correctly what you are proposing I do not
 see simplification since the bulk of the complexity is in the
 ioapic_update_(my|all)_eoi_exitmap() and they will be the same in both
 implementations. Actually I do see complication in your idea introduced
 by the fact that the case when update is done from vcpu thread have to
 be handled specially.
 
 The proposed patch may be simplified further by
 make_all_vcpus_request_async(update)(*) instead of making request and
 kicking each vcpu individually. In fact the way it is done now is buggy
 since requests are made only for vcpus with bit set in their bitmask,
 but if bit is cleared request is not made so vcpu can run with stale
 bitmask.
ok, how about the follow logic:
ioapic_write()
lock()
clear_eoi_exitmap_on_all_vcpus()
perform update(no make request)
make_all_vcpus_request(like tlb flush)
unlock()

Best regards,
Yang

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Marcelo Tosatti
On Tue, Jan 08, 2013 at 12:43:22AM +, Zhang, Yang Z wrote:
 Marcelo Tosatti wrote on 2013-01-08:
  On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
  ioapic_write (or any other ioapic update)
  lock()
  perform update
  make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
  unlock()
  
  (*) Similarly to TLB flush.
  
  The advantage is that all work becomes vcpu local. The end result
  is much simpler code.
  What complexity will it remove?
  
  Synchronization between multiple CPUs (except the KVM_REQ_ bit
  processing, which is infrastructure shared by other parts of KVM).
  
  We agreed that performance is non issue here.
 The current logic is this:
 ioapic_write
 lock()
 perform update
 make request on each vcpu
 kick each vcpu
 unlock()
 
 The only difference is the way to make the request. And the complex part is 
 performing update. With your suggestion, we still need to do the update. Why 
 you think it is much simpler?

The update should be local, because there is no reason to make it
remote.

   kvm_for_each_vcpu(index, vcpu, kvm)
   kvm_x86_ops-update_exitmap_start(vcpu);

   for (index = 0; index  IOAPIC_NUM_PINS; index++) {
   e = ioapic-redirtbl[index];
   if (!e-fields.mask)
   ioapic_update_eoi_exitmap_one(ioapic, index);
   }

   kvm_for_each_vcpu(index, vcpu, kvm) {
   kvm_x86_ops-update_exitmap_end(vcpu);
   kvm_vcpu_kick(vcpu);
   }

No need for start, end, etc calls into vcpus. All ioapic updater does
is set a request bit, similar to a remote TLB flush. Update is then
entirely done vcpu local.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Marcelo Tosatti
On Tue, Jan 08, 2013 at 12:03:32PM +0200, Gleb Natapov wrote:
 On Mon, Jan 07, 2013 at 07:32:39PM -0200, Marcelo Tosatti wrote:
  On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
ioapic_write (or any other ioapic update)
lock()
perform update
make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
unlock()

(*) Similarly to TLB flush.

The advantage is that all work becomes vcpu local. The end result
is much simpler code.
   What complexity will it remove?
  
  Synchronization between multiple CPUs (except the KVM_REQ_ bit
  processing, which is infrastructure shared by other parts of KVM).
  
 Synchronization is just a lock around bitmap access. Can be replaced
 with RCU if it turns to be performance problem.
 
  We agreed that performance is non issue here.
 Yes, if the code is indeed simpler we can take the hit, although
 recalculating bitmap 255 times instead of one for -smp 255 looks like a
 little bit excessive, but I do not see considerable simplification (if
 at all).
 
 So as far as I understand you are proposing:
 
 vcpu0 or io thread:   |vcpu1:
 ioapic_write (or other ioapic update) |
  lock(exitbitmap) |
  if (on vcpu) |
ioapic_update_my_eoi_exitmap() |
  make_all_vcpus_request(update)   |if (update requested)
   |  
 ioapic_update_my_eoi_exitmap()
  unlock(exitbitmap)   |
 
 The current patch logic is this:
 
 vcpu0 or io thread:   |  vcpu1:
 ioapic_write (or other ioapic update) |
  lock(exitbitmap) |
  ioapic_update_all_eoi_exitmaps() |   --- (1)

XXX

  make request on each vcpu| 
  kick each vcpu   | if (update requested)
  unlock(exitbitmap)   |lock(exitbitmap)
   |load_exitbitmap()
   |unlock(exitbitmap)
 
 If I described correctly what you are proposing I do not
 see simplification since the bulk of the complexity is in the
 ioapic_update_(my|all)_eoi_exitmap() and they will be the same in both
 implementations. Actually I do see complication in your idea introduced
 by the fact that the case when update is done from vcpu thread have to
 be handled specially.

The simplification is you don't remotely update the EOI exit bitmaps. 
Its obvious that updating data locally (that is, the context that
updates data is the only user of that data) is simpler.

Yes, ioapic_update_exitmap is the same implementation, but there is no
concern over the state of the remote vcpu. 

For example, you don't have to worry whether code such as this

+static void vmx_update_exitmap_start(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   memset(vmx-eoi_exit_bitmap, 0, 32);
+   spin_lock(vmx-eoi_bitmap_lock);
+}

is safe.

Because eoi_exit_bitmap is only accessed by the vcpu itself.

Does that make sense?

 The proposed patch may be simplified further by 
 make_all_vcpus_request_async(update)(*)
 instead of making request and kicking each vcpu individually. In fact
 the way it is done now is buggy since requests are made only for vcpus
 with bit set in their bitmask, but if bit is cleared request is not made
 so vcpu can run with stale bitmask.
 
 (*) not exists yet as far as I see.
 
 --
   Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Marcelo Tosatti
On Tue, Jan 08, 2013 at 12:57:42PM +, Zhang, Yang Z wrote:
 Gleb Natapov wrote on 2013-01-08:
  On Mon, Jan 07, 2013 at 07:32:39PM -0200, Marcelo Tosatti wrote:
  On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
  ioapic_write (or any other ioapic update)
  lock()
  perform update
  make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
  unlock()
  
  (*) Similarly to TLB flush.
  
  The advantage is that all work becomes vcpu local. The end result
  is much simpler code.
  What complexity will it remove?
  
  Synchronization between multiple CPUs (except the KVM_REQ_ bit
  processing, which is infrastructure shared by other parts of KVM).
  
  Synchronization is just a lock around bitmap access. Can be replaced
  with RCU if it turns to be performance problem.
  
  We agreed that performance is non issue here.
  Yes, if the code is indeed simpler we can take the hit, although
  recalculating bitmap 255 times instead of one for -smp 255 looks like a
  little bit excessive, but I do not see considerable simplification (if
  at all).
  
  So as far as I understand you are proposing:
  
  vcpu0 or io thread:   |vcpu1:
  ioapic_write (or other ioapic update) |
   lock(exitbitmap) |
   if (on vcpu) |
 ioapic_update_my_eoi_exitmap() |
   make_all_vcpus_request(update)   |if (update requested)
|
  ioapic_update_my_eoi_exitmap()
   unlock(exitbitmap)   |
  The current patch logic is this:
  
  vcpu0 or io thread:   |  vcpu1:
  ioapic_write (or other ioapic update) |
   lock(exitbitmap) |
   ioapic_update_all_eoi_exitmaps() |
   make request on each vcpu|
   kick each vcpu   | if (update requested)
   unlock(exitbitmap)   |lock(exitbitmap)
|load_exitbitmap()
|unlock(exitbitmap)
  If I described correctly what you are proposing I do not
  see simplification since the bulk of the complexity is in the
  ioapic_update_(my|all)_eoi_exitmap() and they will be the same in both
  implementations. Actually I do see complication in your idea introduced
  by the fact that the case when update is done from vcpu thread have to
  be handled specially.
  
  The proposed patch may be simplified further by
  make_all_vcpus_request_async(update)(*) instead of making request and
  kicking each vcpu individually. In fact the way it is done now is buggy
  since requests are made only for vcpus with bit set in their bitmask,
  but if bit is cleared request is not made so vcpu can run with stale
  bitmask.
 ok, how about the follow logic:
 ioapic_write()
 lock()
 clear_eoi_exitmap_on_all_vcpus()
 perform update(no make request)
 make_all_vcpus_request(like tlb flush)
 unlock()

Why not just 

ioapic writer / map updater context
--

ioapic_write()
make_all_vcpus_request()


(no special lock taken)


vcpu context, entry
--

if(check_request(KVM_REQ_, )) {
ioapic_lock();  (*)
update local EOI exit bitmap from IOAPIC
ioapic_unlock();
}



(*) plus any other lock that paths that update the map take





 
 Best regards,
 Yang
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM call agenda for 2013-01-08

2013-01-08 Thread Juan Quintela
Juan Quintela quint...@redhat.com wrote:
 Hi

 Please send in any agenda topics you are interested in.

As there are no agenda, no call.

Happy New Year to Everybody!

Later, Juan.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/1] s390 bug in 3.8-rc

2013-01-08 Thread Christian Borntraeger
Marcelo, Gleb, Frederic,

sorry for spotting this so late. Here is a fix for a bug in kvm on s390
with 3.8-rc.

Due to all the subtle considerations regarding RCU, this is an RFC.

Christian Borntraeger (1):
  s390/kvm: Fix BUG in include/linux/kvm_host.h:745

 arch/s390/kvm/kvm-s390.c | 2 ++
 1 file changed, 2 insertions(+)

-- 
1.7.11.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/1] s390/kvm: Fix BUG in include/linux/kvm_host.h:745

2013-01-08 Thread Christian Borntraeger
commit b080935c8638e08134629d0a9ebdf35669bec14d
kvm: Directly account vtime to system on guest switch

also removed the irq_disable/enable around kvm guest switch, which
is correct in itself. Unfortunately, there is a BUG ON that (correctly)
checks for preemptible to cover the call to rcu later on.
(Introduced with commit 8fa2206821953a50a3a02ea33fcfb3ced2fd9997
KVM: make guest mode entry to be rcu quiescent state)

This check might trigger depending on the kernel config.
Lets make sure that no preemption happens during kvm_guest_enter.
We can enable preemption again after the call to
rcu_virt_note_context_switch returns.

Please note that we continue to run s390 guests with interrupts
enabled.

CC: Frederic Weisbecker fweis...@gmail.com
CC: Gleb Natapov g...@redhat.com
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
---
 arch/s390/kvm/kvm-s390.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c9011bf..f090e81 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -613,7 +613,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_s390_deliver_pending_interrupts(vcpu);
 
vcpu-arch.sie_block-icptcode = 0;
+   preempt_disable();
kvm_guest_enter();
+   preempt_enable();
VCPU_EVENT(vcpu, 6, entering sie flags %x,
   atomic_read(vcpu-arch.sie_block-cpuflags));
trace_kvm_s390_sie_enter(vcpu,
-- 
1.7.11.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 2/3] x86, apicv: add virtual interrupt delivery support

2013-01-08 Thread Gleb Natapov
On Tue, Jan 08, 2013 at 11:57:59AM -0200, Marcelo Tosatti wrote:
 On Tue, Jan 08, 2013 at 12:57:42PM +, Zhang, Yang Z wrote:
  Gleb Natapov wrote on 2013-01-08:
   On Mon, Jan 07, 2013 at 07:32:39PM -0200, Marcelo Tosatti wrote:
   On Mon, Jan 07, 2013 at 07:48:43PM +0200, Gleb Natapov wrote:
   ioapic_write (or any other ioapic update)
   lock()
   perform update
   make_all_vcpus_request(KVM_REQ_UPDATE_EOI_BITMAP) (*)
   unlock()
   
   (*) Similarly to TLB flush.
   
   The advantage is that all work becomes vcpu local. The end result
   is much simpler code.
   What complexity will it remove?
   
   Synchronization between multiple CPUs (except the KVM_REQ_ bit
   processing, which is infrastructure shared by other parts of KVM).
   
   Synchronization is just a lock around bitmap access. Can be replaced
   with RCU if it turns to be performance problem.
   
   We agreed that performance is non issue here.
   Yes, if the code is indeed simpler we can take the hit, although
   recalculating bitmap 255 times instead of one for -smp 255 looks like a
   little bit excessive, but I do not see considerable simplification (if
   at all).
   
   So as far as I understand you are proposing:
   
   vcpu0 or io thread:   |vcpu1:
   ioapic_write (or other ioapic update) |
lock(exitbitmap) |
if (on vcpu) |
  ioapic_update_my_eoi_exitmap() |
make_all_vcpus_request(update)   |if (update requested)
 |
   ioapic_update_my_eoi_exitmap()
unlock(exitbitmap)   |
   The current patch logic is this:
   
   vcpu0 or io thread:   |  vcpu1:
   ioapic_write (or other ioapic update) |
lock(exitbitmap) |
ioapic_update_all_eoi_exitmaps() |
make request on each vcpu|
kick each vcpu   | if (update requested)
unlock(exitbitmap)   |lock(exitbitmap)
 |load_exitbitmap()
 |unlock(exitbitmap)
   If I described correctly what you are proposing I do not
   see simplification since the bulk of the complexity is in the
   ioapic_update_(my|all)_eoi_exitmap() and they will be the same in both
   implementations. Actually I do see complication in your idea introduced
   by the fact that the case when update is done from vcpu thread have to
   be handled specially.
   
   The proposed patch may be simplified further by
   make_all_vcpus_request_async(update)(*) instead of making request and
   kicking each vcpu individually. In fact the way it is done now is buggy
   since requests are made only for vcpus with bit set in their bitmask,
   but if bit is cleared request is not made so vcpu can run with stale
   bitmask.
  ok, how about the follow logic:
  ioapic_write()
  lock()
  clear_eoi_exitmap_on_all_vcpus()
  perform update(no make request)
  make_all_vcpus_request(like tlb flush)
  unlock()
 
 Why not just 
 
 ioapic writer / map updater context
 --
 
 ioapic_write()
 make_all_vcpus_request()
 
 
 (no special lock taken)
 
 
 vcpu context, entry
 --
 
   if(check_request(KVM_REQ_, )) {
   ioapic_lock();  (*)
   update local EOI exit bitmap from IOAPIC
   ioapic_unlock();
   }
 
Fine by me. Looks simpler.

 
 
 (*) plus any other lock that paths that update the map take
 
 
 
 
 
  
  Best regards,
  Yang

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 1/1] s390/kvm: Fix BUG in include/linux/kvm_host.h:745

2013-01-08 Thread Frederic Weisbecker
2013/1/8 Christian Borntraeger borntrae...@de.ibm.com:
 commit b080935c8638e08134629d0a9ebdf35669bec14d
 kvm: Directly account vtime to system on guest switch

 also removed the irq_disable/enable around kvm guest switch, which
 is correct in itself. Unfortunately, there is a BUG ON that (correctly)
 checks for preemptible to cover the call to rcu later on.
 (Introduced with commit 8fa2206821953a50a3a02ea33fcfb3ced2fd9997
 KVM: make guest mode entry to be rcu quiescent state)

 This check might trigger depending on the kernel config.
 Lets make sure that no preemption happens during kvm_guest_enter.
 We can enable preemption again after the call to
 rcu_virt_note_context_switch returns.

 Please note that we continue to run s390 guests with interrupts
 enabled.

 CC: Frederic Weisbecker fweis...@gmail.com
 CC: Gleb Natapov g...@redhat.com
 Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
 ---
  arch/s390/kvm/kvm-s390.c | 2 ++
  1 file changed, 2 insertions(+)

 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
 index c9011bf..f090e81 100644
 --- a/arch/s390/kvm/kvm-s390.c
 +++ b/arch/s390/kvm/kvm-s390.c
 @@ -613,7 +613,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 kvm_s390_deliver_pending_interrupts(vcpu);

 vcpu-arch.sie_block-icptcode = 0;
 +   preempt_disable();
 kvm_guest_enter();
 +   preempt_enable();

Sorry for the issue.

The fix looks good to me, thanks!

Acked-by: Frederic Weisbecker fweis...@gmail.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL] KVM fixes for 3.8-rc2

2013-01-08 Thread Marcelo Tosatti

Linus,

Please pull from

git://git.kernel.org/pub/scm/virt/kvm/kvm.git master

To receive the following KVM bug fixes


Alexander Graf (1):
  Merge commit 'origin/master' into kvm-ppc-3.8

Andreas Schwab (1):
  KVM: PPC: Book3S HV: Fix compilation without CONFIG_PPC_POWERNV

Bharat Bhushan (1):
  powerpc: Corrected include header path in kvm_para.h

Gleb Natapov (1):
  Merge branch 'kvm-ppc-3.8' of https://github.com/agraf/linux-2.6 into 
master

Li Zhong (1):
  Add rcu user eqs exception hooks for async page fault

Marcelo Tosatti (1):
  KVM: x86: use dynamic percpu allocations for shared msrs area

 arch/powerpc/include/uapi/asm/kvm_para.h |2 +-
 arch/powerpc/kvm/book3s_hv_ras.c |4 
 arch/x86/kernel/kvm.c|   12 ++--
 arch/x86/kvm/x86.c   |   24 ++--
 4 files changed, 33 insertions(+), 9 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 00/14] KVM/ARM Implementation

2013-01-08 Thread Christoffer Dall
The following series implements KVM support for ARM processors,
specifically on the Cortex-A15 platform.

Work is done in collaboration between Columbia University, Virtual Open
Systems and ARM/Linaro.

The patch series applies to Linux 3.8-rc2 with kvm/next merged:
 git://git.kernel.org/pub/scm/virt/kvm/kvm.git
branch: next (commit e11ae1a102b)

The series relies on two additional patches in Will Deacon's perf tree:
ARM: Define CPU part numbers and implementors
ARM: Use implementor and part defines from cputype.h

This is Version 15 of the patch series, the first 10 versions were
reviewed on the KVM/ARM and KVM mailing lists. Changes can also be
pulled from:
git://github.com/virtualopensystems/linux-kvm-arm.git
branch: kvm-arm-v15
branch: kvm-arm-v15-vgic
branch: kvm-arm-v15-vgic-timers

A non-flattened edition of the patch series, which can always be merged,
can be found at:
 git://github.com/virtualopensystems/linux-kvm-arm.git kvm-arm-master

This patch series requires QEMU compatibility.  Use the branch
 git://github.com/virtualopensystems/qemu.git kvm-arm

There is also WIP QEMU patches to support virtio on ARM:
 git://github.com/virtualopensystems/qemu.git kvm-arm-virtio

There is also a rebasing WIP branch with support for huge pages:
 git://github.com/virtualopensystems/linux-kvm-arm.git kvm-arm-hugetlb

Finally there is kvmtool support available for the mach-virt machine:
 git://git.kernel.org/pub/scm/linux/kernel/git/will/kvmtool.git

Following this patch series, which implements core KVM support, are two
other patch series implementing Virtual Generic Interrupt Controller
(VGIC) support and Architected Generic Timers.  All three patch series
should be applied for full QEMU compatibility.

The implementation is broken up into a logical set of patches, the first
are preparatory patches:
  1. ARM: Add page table defines for KVM
  2. ARM: Section based HYP idmaps

The main implementation is broken up into separate patches, the first
containing a skeleton of files, makefile changes, the basic user space
interface and KVM architecture specific stubs.  Subsequent patches
implement parts of the system as listed:
  3. Skeleton and reset hooks
  4. Hypervisor initialization
  5. Memory virtualization setup (hyp mode mappings and 2nd stage)
  6. Inject IRQs and FIQs from userspace
  7. World-switch implementation and Hyp exception vectors
  8. Emulation framework and coproc emulation
  9. Coproc user space API
 10. Demux multiplexed coproc registers
 11. User spac API to get/set VFP registers
 12. Handle guest user memory aborts
 13. Handle guest MMIO aborts
 14. Add an entry in the MAINTAINERS file

Testing:
 Tested on the Versatile Express TC2 devboard and on the Arndale board.
 running simultaenous VMs, all running SMP, on an SMP host, each
 VM running hackbench and cyclictest and with extreme memory pressure
 applied to the host with swapping enabled to provoke page eviction.
 Also tested KSM merging swapping on the host.  Fully boots both Ubuntu
 (user space Thumb-2) and Debian (user space ARM) guests each of which
 can run a number of worloads like apache, mysql, kernel compile, network
 tests, and more.

For a guide on how to set up a testing environment and try out these
patches, see:
 http://www.virtualopensystems.com/media/pdf/kvm-arm-guide.pdf

Changes since v14:
 - Fixed permission fault handling by correctly retrieving the IPA on
   Stage-2 permission faults
 - Fix compile error when !CONFIG_KVM_ARM_HOST
 - Support building into separate object directory
 - Fixed the Vodoo Bug (see
   https://github.com/virtualopensystems/linux-kvm-arm/wiki/Voodoo-Bug)
 - Improved some tracepoint debugs
 - Improved and cleaned up VTCR and VTTBR initialization
 - Clarified and unified Stage-2 page table clearing
 - Addressed a large number of concerns from Will Deacon's review,
   including fixing a race condition and removing unused exports.
 - Be a little more verbose when something goes wrong during the init
   process.

Changes since v13:
 - Fix VTTBR mask bug
 - Change KVM_MAX_VCPUS to config option (defualt 4)
 - Go back to struct pt_regs in kvm_regs struct
 - Factor out mmio instruction decoding to a separate file with non
   kvm-specific data structures as the interface.
 - Update kvm_device_address struct to use 64-bit fields
 - Various cleanups and compile fixes

Changes since v12:
 - Documentation updates
 - Change Hyp-ABI to function call based paradigm
 - Cleanup world-switch code
 - Unify HIFAR/HDFAR on the vcpu struct
 - Simplify vcpu register access in sofware
 - Enforce use of vcpu field accessors
 - Factor out mmio handling into separate file
 - Check for overlaps in mmio address mappings
 - Bugfix in mmio decoding
 - Complete rework of ARM mmio load/store instruction

Changes since v11:
 - Memory setup and page table defines reworked
 - We do not export unused perf bitfields anymore
 - No module support anymore and following cleanup
 - Hide vcpu 

[PATCH v5 01/14] ARM: Add page table and page defines needed by KVM

2013-01-08 Thread Christoffer Dall
KVM uses the stage-2 page tables and the Hyp page table format,
so we define the fields and page protection flags needed by KVM.

The nomenclature is this:
 - page_hyp:PL2 code/data mappings
 - page_hyp_device: PL2 device mappings (vgic access)
 - page_s2: Stage-2 code/data page mappings
 - page_s2_device:  Stage-2 device mappings (vgic access)

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/pgtable-3level.h |   18 ++
 arch/arm/include/asm/pgtable.h|7 +++
 arch/arm/mm/mmu.c |   22 ++
 3 files changed, 47 insertions(+)

diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index a3f3792..6ef8afd 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,11 +104,29 @@
  */
 #define L_PGD_SWAPPER  (_AT(pgdval_t, 1)  55)/* 
swapper_pg_dir entry */
 
+/*
+ * 2nd stage PTE definitions for LPAE.
+ */
+#define L_PTE_S2_MT_UNCACHED(_AT(pteval_t, 0x5)  2) /* MemAttr[3:0] */
+#define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa)  2) /* MemAttr[3:0] */
+#define L_PTE_S2_MT_WRITEBACK   (_AT(pteval_t, 0xf)  2) /* MemAttr[3:0] */
+#define L_PTE_S2_RDONLY (_AT(pteval_t, 1)  6)   /* HAP[1]   
*/
+#define L_PTE_S2_RDWR   (_AT(pteval_t, 2)  6)   /* HAP[2:1] */
+
+/*
+ * Hyp-mode PL2 PTE definitions for LPAE.
+ */
+#define L_PTE_HYP  L_PTE_USER
+
 #ifndef __ASSEMBLY__
 
 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!(pud_val(pud)  2))
 #define pud_present(pud)   (pud_val(pud))
+#define pmd_table(pmd) ((pmd_val(pmd)  PMD_TYPE_MASK) == \
+PMD_TYPE_TABLE)
+#define pmd_sect(pmd)  ((pmd_val(pmd)  PMD_TYPE_MASK) == \
+PMD_TYPE_SECT)
 
 #define pud_clear(pudp)\
do {\
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9c82f98..f30ac3b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,9 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_tpgprot_user;
 extern pgprot_tpgprot_kernel;
+extern pgprot_tpgprot_hyp_device;
+extern pgprot_tpgprot_s2;
+extern pgprot_tpgprot_s2_device;
 
 #define _MOD_PROT(p, b)__pgprot(pgprot_val(p) | (b))
 
@@ -82,6 +85,10 @@ extern pgprot_t  pgprot_kernel;
 #define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | 
L_PTE_RDONLY)
 #define PAGE_KERNEL_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC   pgprot_kernel
+#define PAGE_HYP   _MOD_PROT(pgprot_kernel, L_PTE_HYP)
+#define PAGE_HYP_DEVICE_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
+#define PAGE_S2_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | 
L_PTE_S2_RDONLY)
 
 #define __PAGE_NONE__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | 
L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED  __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 9f06102..1f51d71 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -57,6 +57,9 @@ static unsigned int cachepolicy __initdata = 
CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_hyp_device;
+pgprot_t pgprot_s2;
+pgprot_t pgprot_s2_device;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
@@ -66,34 +69,46 @@ struct cachepolicy {
unsigned intcr_mask;
pmdval_tpmd;
pteval_tpte;
+   pteval_tpte_s2;
 };
 
+#ifdef CONFIG_ARM_LPAE
+#define s2_policy(policy)  policy
+#else
+#define s2_policy(policy)  0
+#endif
+
 static struct cachepolicy cache_policies[] __initdata = {
{
.policy = uncached,
.cr_mask= CR_W|CR_C,
.pmd= PMD_SECT_UNCACHED,
.pte= L_PTE_MT_UNCACHED,
+   .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED),
}, {
.policy = buffered,
.cr_mask= CR_C,
.pmd= PMD_SECT_BUFFERED,
.pte= L_PTE_MT_BUFFERABLE,
+   .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED),
}, {
.policy = writethrough,
.cr_mask= 0,
.pmd= PMD_SECT_WT,
.pte= L_PTE_MT_WRITETHROUGH,
+   .pte_s2 = 

[PATCH v5 02/14] ARM: Section based HYP idmap

2013-01-08 Thread Christoffer Dall
Add a method (hyp_idmap_setup) to populate a hyp pgd with an
identity mapping of the code contained in the .hyp.idmap.text
section.

Offer a method to drop this identity mapping through
hyp_idmap_teardown.

Make all the above depend on CONFIG_ARM_VIRT_EXT and CONFIG_ARM_LPAE.

Cc: Will Deacon will.dea...@arm.com
Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/idmap.h|1 +
 arch/arm/include/asm/pgtable-3level-hwdef.h |1 +
 arch/arm/kernel/vmlinux.lds.S   |6 +++
 arch/arm/mm/idmap.c |   54 ++-
 4 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h
index bf863ed..1a66f907 100644
--- a/arch/arm/include/asm/idmap.h
+++ b/arch/arm/include/asm/idmap.h
@@ -8,6 +8,7 @@
 #define __idmap __section(.idmap.text) noinline notrace
 
 extern pgd_t *idmap_pgd;
+extern pgd_t *hyp_pgd;
 
 void setup_mm_for_reboot(void);
 
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h 
b/arch/arm/include/asm/pgtable-3level-hwdef.h
index d795282..a2d404e 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -44,6 +44,7 @@
 #define PMD_SECT_XN(_AT(pmdval_t, 1)  54)
 #define PMD_SECT_AP_WRITE  (_AT(pmdval_t, 0))
 #define PMD_SECT_AP_READ   (_AT(pmdval_t, 0))
+#define PMD_SECT_AP1   (_AT(pmdval_t, 1)  6)
 #define PMD_SECT_TEX(x)(_AT(pmdval_t, 0))
 
 /*
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 11c1785..b571484 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -19,7 +19,11 @@
ALIGN_FUNCTION();   \
VMLINUX_SYMBOL(__idmap_text_start) = .; \
*(.idmap.text)  \
-   VMLINUX_SYMBOL(__idmap_text_end) = .;
+   VMLINUX_SYMBOL(__idmap_text_end) = .;   \
+   ALIGN_FUNCTION();   \
+   VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+   *(.hyp.idmap.text)  \
+   VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define ARM_CPU_DISCARD(x)
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 99db769..d9213a5 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -1,4 +1,6 @@
+#include linux/module.h
 #include linux/kernel.h
+#include linux/slab.h
 
 #include asm/cputype.h
 #include asm/idmap.h
@@ -6,6 +8,7 @@
 #include asm/pgtable.h
 #include asm/sections.h
 #include asm/system_info.h
+#include asm/virt.h
 
 pgd_t *idmap_pgd;
 
@@ -59,11 +62,20 @@ static void idmap_add_pud(pgd_t *pgd, unsigned long addr, 
unsigned long end,
} while (pud++, addr = next, addr != end);
 }
 
-static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long 
end)
+static void identity_mapping_add(pgd_t *pgd, const char *text_start,
+const char *text_end, unsigned long prot)
 {
-   unsigned long prot, next;
+   unsigned long addr, end;
+   unsigned long next;
+
+   addr = virt_to_phys(text_start);
+   end = virt_to_phys(text_end);
+
+   pr_info(Setting up static %sidentity map for 0x%llx - 0x%llx\n,
+   prot ? HYP  : ,
+   (long long)addr, (long long)end);
+   prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
 
-   prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
if (cpu_architecture() = CPU_ARCH_ARMv5TEJ  !cpu_is_xscale())
prot |= PMD_BIT4;
 
@@ -74,28 +86,48 @@ static void identity_mapping_add(pgd_t *pgd, unsigned long 
addr, unsigned long e
} while (pgd++, addr = next, addr != end);
 }
 
+#ifdef CONFIG_ARM_VIRT_EXT
+pgd_t *hyp_pgd;
+
+extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+static int __init init_static_idmap_hyp(void)
+{
+   hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+   if (!hyp_pgd)
+   return -ENOMEM;
+
+   identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
+__hyp_idmap_text_end, PMD_SECT_AP1);
+
+   return 0;
+}
+#else
+static int __init init_static_idmap_hyp(void)
+{
+   return 0;
+}
+#endif
+
 extern char  __idmap_text_start[], __idmap_text_end[];
 
 static int __init init_static_idmap(void)
 {
-   phys_addr_t idmap_start, idmap_end;
+   int ret;
 
idmap_pgd = pgd_alloc(init_mm);
if (!idmap_pgd)
return -ENOMEM;
 
-   /* Add an identity mapping for the physical address of the section. */
-   idmap_start = virt_to_phys((void *)__idmap_text_start);
-   idmap_end = 

[PATCH v5 03/14] KVM: ARM: Initial skeleton to compile KVM support

2013-01-08 Thread Christoffer Dall
Targets KVM support for Cortex A-15 processors.

Contains all the framework components, make files, header files, some
tracing functionality, and basic user space API.

Only supported core is Cortex-A15 for now.

Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt  |   57 +-
 arch/arm/Kconfig   |2 
 arch/arm/Makefile  |1 
 arch/arm/include/asm/kvm_arm.h |   24 ++
 arch/arm/include/asm/kvm_asm.h |   58 ++
 arch/arm/include/asm/kvm_coproc.h  |   24 ++
 arch/arm/include/asm/kvm_emulate.h |   50 +
 arch/arm/include/asm/kvm_host.h|  114 
 arch/arm/include/uapi/asm/kvm.h|  106 +++
 arch/arm/kvm/Kconfig   |   55 ++
 arch/arm/kvm/Makefile  |   21 ++
 arch/arm/kvm/arm.c |  355 
 arch/arm/kvm/coproc.c  |   23 ++
 arch/arm/kvm/emulate.c |  155 
 arch/arm/kvm/guest.c   |  221 ++
 arch/arm/kvm/init.S|   19 ++
 arch/arm/kvm/interrupts.S  |   19 ++
 arch/arm/kvm/mmu.c |   17 ++
 arch/arm/kvm/reset.c   |   74 
 arch/arm/kvm/trace.h   |   52 +
 include/uapi/linux/kvm.h   |7 +
 21 files changed, 1450 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_arm.h
 create mode 100644 arch/arm/include/asm/kvm_asm.h
 create mode 100644 arch/arm/include/asm/kvm_coproc.h
 create mode 100644 arch/arm/include/asm/kvm_emulate.h
 create mode 100644 arch/arm/include/asm/kvm_host.h
 create mode 100644 arch/arm/include/uapi/asm/kvm.h
 create mode 100644 arch/arm/kvm/Kconfig
 create mode 100644 arch/arm/kvm/Makefile
 create mode 100644 arch/arm/kvm/arm.c
 create mode 100644 arch/arm/kvm/coproc.c
 create mode 100644 arch/arm/kvm/emulate.c
 create mode 100644 arch/arm/kvm/guest.c
 create mode 100644 arch/arm/kvm/init.S
 create mode 100644 arch/arm/kvm/interrupts.S
 create mode 100644 arch/arm/kvm/mmu.c
 create mode 100644 arch/arm/kvm/reset.c
 create mode 100644 arch/arm/kvm/trace.h

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4df553..4237c27 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -293,7 +293,7 @@ kvm_run' (see below).
 4.11 KVM_GET_REGS
 
 Capability: basic
-Architectures: all
+Architectures: all except ARM
 Type: vcpu ioctl
 Parameters: struct kvm_regs (out)
 Returns: 0 on success, -1 on error
@@ -314,7 +314,7 @@ struct kvm_regs {
 4.12 KVM_SET_REGS
 
 Capability: basic
-Architectures: all
+Architectures: all except ARM
 Type: vcpu ioctl
 Parameters: struct kvm_regs (in)
 Returns: 0 on success, -1 on error
@@ -600,7 +600,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86, ia64, ARM
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -608,7 +608,8 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created.
+only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM, a GIC is
+created.
 
 
 4.25 KVM_IRQ_LINE
@@ -1775,6 +1776,14 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_VPA_DTL   | 128
   PPC   | KVM_REG_PPC_EPCR | 32
 
+ARM registers are mapped using the lower 32 bits.  The upper 16 of that
+is the register group type, or coprocessor number:
+
+ARM core registers have the following id bit patterns:
+  0x4002  0010 index into the kvm_regs struct:16
+
+
+
 4.69 KVM_GET_ONE_REG
 
 Capability: KVM_CAP_ONE_REG
@@ -2127,6 +2136,46 @@ written, then `n_invalid' invalid entries, invalidating 
any previously
 valid entries found.
 
 
+4.77 KVM_ARM_VCPU_INIT
+
+Capability: basic
+Architectures: arm
+Type: vcpu ioctl
+Parameters: struct struct kvm_vcpu_init (in)
+Returns: 0 on success; -1 on error
+Errors:
+  EINVAL:    the target is unknown, or the combination of features is invalid.
+  ENOENT:    a features bit specified is unknown.
+
+This tells KVM what type of CPU to present to the guest, and what
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
+return ENOEXEC for that vcpu.
+
+Note that because some registers reflect machine topology, all vcpus
+should be created before this ioctl is invoked.
+
+
+4.78 KVM_GET_REG_LIST
+
+Capability: basic
+Architectures: arm

[PATCH v5 05/14] KVM: ARM: Memory virtualization setup

2013-01-08 Thread Christoffer Dall
This commit introduces the framework for guest memory management
through the use of 2nd stage translation. Each VM has a pointer
to a level-1 table (the pgd field in struct kvm_arch) which is
used for the 2nd stage translations. Entries are added when handling
guest faults (later patch) and the table itself can be allocated and
freed through the following functions implemented in
arch/arm/kvm/arm_mmu.c:
 - kvm_alloc_stage2_pgd(struct kvm *kvm);
 - kvm_free_stage2_pgd(struct kvm *kvm);

Each entry in TLBs and caches are tagged with a VMID identifier in
addition to ASIDs. The VMIDs are assigned consecutively to VMs in the
order that VMs are executed, and caches and tlbs are invalidated when
the VMID space has been used to allow for more than 255 simultaenously
running guests.

The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is
freed in kvm_arch_destroy_vm(). Both functions are called from the main
KVM code.

We pre-allocate page table memory to be able to synchronize using a
spinlock and be called under rcu_read_lock from the MMU notifiers.  We
steal the mmu_memory_cache implementation from x86 and adapt for our
specific usage.

We support MMU notifiers (thanks to Marc Zyngier) through
kvm_unmap_hva and kvm_set_spte_hva.

Finally, define kvm_phys_addr_ioremap() to map a device at a guest IPA,
which is used by VGIC support to map the virtual CPU interface registers
to the guest. This support is added by Marc Zyngier.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_asm.h  |2 
 arch/arm/include/asm/kvm_host.h |   19 ++
 arch/arm/include/asm/kvm_mmu.h  |9 +
 arch/arm/kvm/Kconfig|1 
 arch/arm/kvm/arm.c  |   37 
 arch/arm/kvm/interrupts.S   |   10 +
 arch/arm/kvm/mmu.c  |  370 +++
 arch/arm/kvm/trace.h|   46 +
 8 files changed, 492 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 81324e2..f6652f6 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -57,6 +57,7 @@
 #define ARM_EXCEPTION_HVC7
 
 #ifndef __ASSEMBLY__
+struct kvm;
 struct kvm_vcpu;
 
 extern char __kvm_hyp_init[];
@@ -71,6 +72,7 @@ extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
 extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 92e89f3..1de6f0d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -111,4 +111,23 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 
__user *indices);
 struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+u64 kvm_call_hyp(void *hypfn, ...);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+   unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+   return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+   return 0;
+}
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index e8679b3..499e7b0 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -23,6 +23,15 @@ int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_hyp_pmds(void);
 
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+ phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
 phys_addr_t kvm_mmu_get_httbr(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4a01b6f..05227cb 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
bool KVM host support for ARM cpus.
depends on KVM
depends on MMU
+   select  MMU_NOTIFIER
---help---
  Provides host support for ARM processors.
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2dddc58..ab82039 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -81,12 +81,33 

[PATCH v5 06/14] KVM: ARM: Inject IRQs and FIQs from userspace

2013-01-08 Thread Christoffer Dall
From: Christoffer Dall cd...@cs.columbia.edu

All interrupt injection is now based on the VM ioctl KVM_IRQ_LINE.  This
works semantically well for the GIC as we in fact raise/lower a line on
a machine component (the gic).  The IOCTL uses the follwing struct.

struct kvm_irq_level {
union {
__u32 irq; /* GSI */
__s32 status;  /* not used for KVM_IRQ_LEVEL */
};
__u32 level;   /* 0 or 1 */
};

ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
specific cpus.  The irq field is interpreted like this:

  bits:  | 31 ... 24 | 23  ... 16 | 15...0 |
  field: | irq_type  | vcpu_index |   irq_number   |

The irq_type field has the following values:
- irq_type[0]: out-of-kernel GIC: irq_number 0 is IRQ, irq_number 1 is FIQ
- irq_type[1]: in-kernel GIC: SPI, irq_number between 32 and 1019 (incl.)
   (the vcpu_index field is ignored)
- irq_type[2]: in-kernel GIC: PPI, irq_number between 16 and 31 (incl.)

The irq_number thus corresponds to the irq ID in as in the GICv2 specs.

This is documented in Documentation/kvm/api.txt.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |   25 --
 arch/arm/include/asm/kvm_arm.h|1 +
 arch/arm/include/uapi/asm/kvm.h   |   21 
 arch/arm/kvm/arm.c|   65 +
 arch/arm/kvm/trace.h  |   25 ++
 include/uapi/linux/kvm.h  |1 +
 6 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 4237c27..5050492 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -615,15 +615,32 @@ created.
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86, ia64, arm
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
 
 Sets the level of a GSI input to the interrupt controller model in the kernel.
-Requires that an interrupt controller model has been previously created with
-KVM_CREATE_IRQCHIP.  Note that edge-triggered interrupts require the level
-to be set to 1 and then back to 0.
+On some architectures it is required that an interrupt controller model has
+been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
+interrupts require the level to be set to 1 and then back to 0.
+
+ARM can signal an interrupt either at the CPU level, or at the in-kernel 
irqchip
+(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
+specific cpus.  The irq field is interpreted like this:
+
+  bits:  | 31 ... 24 | 23  ... 16 | 15...0 |
+  field: | irq_type  | vcpu_index | irq_id |
+
+The irq_type field has the following values:
+- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
+- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+   (the vcpu_index field is ignored)
+- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+
+(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
+
+In both cases, level is used to raise/lower the line.
 
 struct kvm_irq_level {
union {
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 613afe2..fb22ee8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -68,6 +68,7 @@
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
HCR_SWIO | HCR_TIDCP)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* Hyp System Control Register (HSCTLR) bits */
 #define HSCTLR_TE  (1  30)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c6298b1..4cf6d8f 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -23,6 +23,7 @@
 #include asm/ptrace.h
 
 #define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
 
 #define KVM_REG_SIZE(id)   \
(1U  (((id)  KVM_REG_SIZE_MASK)  KVM_REG_SIZE_SHIFT))
@@ -103,4 +104,24 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CORE   (0x0010  KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
 
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT 24
+#define KVM_ARM_IRQ_TYPE_MASK  0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT 16
+#define KVM_ARM_IRQ_VCPU_MASK  0xff
+#define KVM_ARM_IRQ_NUM_SHIFT  0
+#define KVM_ARM_IRQ_NUM_MASK   0x
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU   0

[PATCH v5 07/14] KVM: ARM: World-switch implementation

2013-01-08 Thread Christoffer Dall
Provides complete world-switch implementation to switch to other guests
running in non-secure modes. Includes Hyp exception handlers that
capture necessary exception information and stores the information on
the VCPU and KVM structures.

The following Hyp-ABI is also documented in the code:

Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
   Switching to Hyp mode is done through a simple HVC #0 instruction. The
   exception vector code will check that the HVC comes from VMID==0 and if
   so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
   - r0 contains a pointer to a HYP function
   - r1, r2, and r3 contain arguments to the above function.
   - The HYP function will be called with its arguments in r0, r1 and r2.
   On HYP function return, we return directly to SVC.

A call to a function executing in Hyp mode is performed like the following:

svc code
ldr r0, =BSYM(my_hyp_fn)
ldr r1, =my_param
hvc #0  ; Call my_hyp_fn(my_param) from HYP mode
svc code

Otherwise, the world-switch is pretty straight-forward. All state that
can be modified by the guest is first backed up on the Hyp stack and the
VCPU values is loaded onto the hardware. State, which is not loaded, but
theoretically modifiable by the guest is protected through the
virtualiation features to generate a trap and cause software emulation.
Upon guest returns, all state is restored from hardware onto the VCPU
struct and the original state is restored from the Hyp-stack onto the
hardware.

SMP support using the VMPIDR calculated on the basis of the host MPIDR
and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier.

Reuse of VMIDs has been implemented by Antonios Motakis and adapated from
a separate patch into the appropriate patches introducing the
functionality. Note that the VMIDs are stored per VM as required by the ARM
architecture reference manual.

To support VFP/NEON we trap those instructions using the HPCTR. When
we trap, we switch the FPU.  After a guest exit, the VFP state is
returned to the host.  When disabling access to floating point
instructions, we also mask FPEXC_EN in order to avoid the guest
receiving Undefined instruction exceptions before we have a chance to
switch back the floating point state.  We are reusing vfp_hard_struct,
so we depend on VFPv3 being enabled in the host kernel, if not, we still
trap cp10 and cp11 in order to inject an undefined instruction exception
whenever the guest tries to use VFP/NEON. VFP/NEON developed by
Antionios Motakis and Rusty Russell.

Aborts that are permission faults, and not stage-1 page table walk, do
not report the faulting address in the HPFAR.  We have to resolve the
IPA, and store it just like the HPFAR register on the VCPU struct. If
the IPA cannot be resolved, it means another CPU is playing with the
page tables, and we simply restart the guest.  This quirk was fixed by
Marc Zyngier.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Antonios Motakis a.mota...@virtualopensystems.com
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arm.h  |   51 
 arch/arm/include/asm/kvm_host.h |   10 +
 arch/arm/kernel/asm-offsets.c   |   25 ++
 arch/arm/kvm/arm.c  |  187 
 arch/arm/kvm/interrupts.S   |  396 +++
 arch/arm/kvm/interrupts_head.S  |  443 +++
 6 files changed, 1108 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/kvm/interrupts_head.S

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index fb22ee8..a3262a2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -98,6 +98,18 @@
 #define TTBCR_T0SZ 3
 #define HTCR_MASK  (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
+/* Hyp System Trap Register */
+#define HSTR_T(x)  (1  x)
+#define HSTR_TTEE  (1  16)
+#define HSTR_TJDBX (1  17)
+
+/* Hyp Coprocessor Trap Register */
+#define HCPTR_TCP(x)   (1  x)
+#define HCPTR_TCP_MASK (0x3fff)
+#define HCPTR_TASE (1  15)
+#define HCPTR_TTA  (1  20)
+#define HCPTR_TCPAC(1  31)
+
 /* Hyp Debug Configuration Register bits */
 #define HDCR_TDRA  (1  11)
 #define HDCR_TDOSA (1  10)
@@ -144,6 +156,45 @@
 #else
 #define VTTBR_X(5 - KVM_T0SZ)
 #endif
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X)) - 1)  VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK  (0xffLLU  VTTBR_VMID_SHIFT)
+
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT   (26)
+#define HSR_EC (0x3fU  HSR_EC_SHIFT)
+#define HSR_IL (1U  25)
+#define HSR_ISS(HSR_IL - 1)
+#define HSR_ISV_SHIFT  (24)
+#define HSR_ISV(1U  

[PATCH v5 08/14] KVM: ARM: Emulation framework and CP15 emulation

2013-01-08 Thread Christoffer Dall
Adds a new important function in the main KVM/ARM code called
handle_exit() which is called from kvm_arch_vcpu_ioctl_run() on returns
from guest execution. This function examines the Hyp-Syndrome-Register
(HSR), which contains information telling KVM what caused the exit from
the guest.

Some of the reasons for an exit are CP15 accesses, which are
not allowed from the guest and this commit handles these exits by
emulating the intended operation in software and skipping the guest
instruction.

Minor notes about the coproc register reset:
1) We reserve a value of 0 as an invalid cp15 offset, to catch bugs in our
   table, at cost of 4 bytes per vcpu.

2) Added comments on the table indicating how we handle each register, for
   simplicity of understanding.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arm.h |9 +
 arch/arm/include/asm/kvm_coproc.h  |   14 +
 arch/arm/include/asm/kvm_emulate.h |6 +
 arch/arm/include/asm/kvm_host.h|4 
 arch/arm/kvm/Makefile  |2 
 arch/arm/kvm/arm.c |  175 +-
 arch/arm/kvm/coproc.c  |  360 
 arch/arm/kvm/coproc.h  |  153 +++
 arch/arm/kvm/coproc_a15.c  |  162 
 arch/arm/kvm/emulate.c |  218 ++
 arch/arm/kvm/trace.h   |   45 +
 11 files changed, 1144 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/kvm/coproc.h
 create mode 100644 arch/arm/kvm/coproc_a15.c

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a3262a2..3ff6f22 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -70,6 +70,11 @@
HCR_SWIO | HCR_TIDCP)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
+/* System Control Register (SCTLR) bits */
+#define SCTLR_TE   (1  30)
+#define SCTLR_EE   (1  25)
+#define SCTLR_V(1  13)
+
 /* Hyp System Control Register (HSCTLR) bits */
 #define HSCTLR_TE  (1  30)
 #define HSCTLR_EE  (1  25)
@@ -171,6 +176,10 @@
 #define HSR_FSC(0x3f)
 #define HSR_FSC_TYPE   (0x3c)
 #define HSR_WNR(1  6)
+#define HSR_CV_SHIFT   (24)
+#define HSR_CV (1U  HSR_CV_SHIFT)
+#define HSR_COND_SHIFT (20)
+#define HSR_COND   (0xfU  HSR_COND_SHIFT)
 
 #define FSC_FAULT  (0x04)
 #define FSC_PERM   (0x0c)
diff --git a/arch/arm/include/asm/kvm_coproc.h 
b/arch/arm/include/asm/kvm_coproc.h
index b6d023d..bd1ace0 100644
--- a/arch/arm/include/asm/kvm_coproc.h
+++ b/arch/arm/include/asm/kvm_coproc.h
@@ -21,4 +21,18 @@
 
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
+struct kvm_coproc_target_table {
+   unsigned target;
+   const struct coproc_reg *table;
+   size_t num;
+};
+void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_coproc_table_init(void);
 #endif /* __ARM_KVM_COPROC_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 17dad67..01a755b 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -25,6 +25,12 @@
 u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 u32 *vcpu_spsr(struct kvm_vcpu *vcpu);
 
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
 static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
 {
return (u32 *)vcpu-arch.regs.usr_regs.ARM_pc;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ddb09da..a56a319 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -94,6 +94,10 @@ struct kvm_vcpu_arch {
 * Anything that is not used directly from assembly code goes
 * here.
 */
+   /* dcache set/way operation pending */
+   int last_pcpu;
+   cpumask_t require_dcache_flush;
+
/* Interrupt related fields */
u32 irq_lines;  /* IRQ and FIQ levels */
 
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index dfc293f..88edce6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,4 +18,4 @@ kvm-arm-y 

[PATCH v5 09/14] KVM: ARM: User space API for getting/setting co-proc registers

2013-01-08 Thread Christoffer Dall
The following three ioctls are implemented:
 -  KVM_GET_REG_LIST
 -  KVM_GET_ONE_REG
 -  KVM_SET_ONE_REG

Now we have a table for all the cp15 registers, we can drive a generic
API.

The register IDs carry the following encoding:

ARM registers are mapped using the lower 32 bits.  The upper 16 of that
is the register group type, or coprocessor number:

ARM 32-bit CP15 registers have the following id bit patterns:
  0x4002  000F zero:1 crn:4 crm:4 opc1:4 opc2:3

ARM 64-bit CP15 registers have the following id bit patterns:
  0x4003  000F zero:1 zero:4 crm:4 opc1:4 zero:3

For futureproofing, we need to tell QEMU about the CP15 registers the
host lets the guest access.

It will need this information to restore a current guest on a future
CPU or perhaps a future KVM which allow some of these to be changed.

We use a separate table for these, as they're only for the userspace API.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |5 +
 arch/arm/include/asm/kvm_coproc.h |9 +
 arch/arm/include/asm/kvm_host.h   |4 
 arch/arm/kvm/coproc.c |  327 +
 arch/arm/kvm/guest.c  |9 +
 5 files changed, 350 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 5050492..0e22874 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1799,6 +1799,11 @@ is the register group type, or coprocessor number:
 ARM core registers have the following id bit patterns:
   0x4002  0010 index into the kvm_regs struct:16
 
+ARM 32-bit CP15 registers have the following id bit patterns:
+  0x4002  000F zero:1 crn:4 crm:4 opc1:4 opc2:3
+
+ARM 64-bit CP15 registers have the following id bit patterns:
+  0x4003  000F zero:1 zero:4 crm:4 opc1:4 zero:3
 
 
 4.69 KVM_GET_ONE_REG
diff --git a/arch/arm/include/asm/kvm_coproc.h 
b/arch/arm/include/asm/kvm_coproc.h
index bd1ace0..4917c2f 100644
--- a/arch/arm/include/asm/kvm_coproc.h
+++ b/arch/arm/include/asm/kvm_coproc.h
@@ -34,5 +34,14 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct 
kvm_run *run);
 int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+unsigned long kvm_arm_num_guest_msrs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_msrindices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 void kvm_coproc_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
 #endif /* __ARM_KVM_COPROC_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a56a319..6cc8933 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_HAVE_ONE_REG
 
 #define KVM_VCPU_MAX_FEATURES 0
 
@@ -134,6 +135,9 @@ int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+
 /* We do not have shadow page tables, hence the empty hooks */
 static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 722efe3..95a0f5e 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -18,6 +18,7 @@
  */
 #include linux/mm.h
 #include linux/kvm_host.h
+#include linux/uaccess.h
 #include asm/kvm_arm.h
 #include asm/kvm_host.h
 #include asm/kvm_emulate.h
@@ -347,6 +348,328 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
return emulate_cp15(vcpu, params);
 }
 
+/**
+ * Userspace API
+ */
+
+static bool index_to_params(u64 id, struct coproc_params *params)
+{
+   switch (id  KVM_REG_SIZE_MASK) {
+   case KVM_REG_SIZE_U32:
+   /* Any unused index bits means it's not valid. */
+   if (id  ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+  | KVM_REG_ARM_COPROC_MASK
+  | KVM_REG_ARM_32_CRN_MASK
+  | KVM_REG_ARM_CRM_MASK
+  | 

[PATCH v5 11/14] KVM: ARM: VFP userspace interface

2013-01-08 Thread Christoffer Dall
From: Rusty Russell rusty.russ...@linaro.org

We use space #18 for floating point regs.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |6 +
 arch/arm/include/uapi/asm/kvm.h   |   12 ++
 arch/arm/kvm/coproc.c |  178 +
 3 files changed, 196 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 94f17a3..38066a7a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1808,6 +1808,12 @@ ARM 64-bit CP15 registers have the following id bit 
patterns:
 ARM CCSIDR registers are demultiplexed by CSSELR value:
   0x4002  0011 00 csselr:8
 
+ARM 32-bit VFP control registers have the following id bit patterns:
+  0x4002  0012 1 regno:12
+
+ARM 64-bit FP registers have the following id bit patterns:
+  0x4002  0012 0 regno:12
+
 4.69 KVM_GET_ONE_REG
 
 Capability: KVM_CAP_ONE_REG
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index aa2684c..73b9615 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -112,6 +112,18 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_DEMUX_VAL_MASK 0x00FF
 #define KVM_REG_ARM_DEMUX_VAL_SHIFT0
 
+/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
+#define KVM_REG_ARM_VFP(0x0012  
KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_VFP_MASK   0x
+#define KVM_REG_ARM_VFP_BASE_REG   0x0
+#define KVM_REG_ARM_VFP_FPSID  0x1000
+#define KVM_REG_ARM_VFP_FPSCR  0x1001
+#define KVM_REG_ARM_VFP_MVFR1  0x1006
+#define KVM_REG_ARM_VFP_MVFR0  0x1007
+#define KVM_REG_ARM_VFP_FPEXC  0x1008
+#define KVM_REG_ARM_VFP_FPINST 0x1009
+#define KVM_REG_ARM_VFP_FPINST20x100A
+
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 1827b64..d782638 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -26,6 +26,8 @@
 #include asm/cacheflush.h
 #include asm/cputype.h
 #include trace/events/kvm.h
+#include asm/vfp.h
+#include ../vfp/vfpinstr.h
 
 #include trace.h
 #include coproc.h
@@ -653,6 +655,170 @@ static int demux_c15_set(u64 id, void __user *uaddr)
}
 }
 
+#ifdef CONFIG_VFPv3
+static const int vfp_sysregs[] = { KVM_REG_ARM_VFP_FPEXC,
+  KVM_REG_ARM_VFP_FPSCR,
+  KVM_REG_ARM_VFP_FPINST,
+  KVM_REG_ARM_VFP_FPINST2,
+  KVM_REG_ARM_VFP_MVFR0,
+  KVM_REG_ARM_VFP_MVFR1,
+  KVM_REG_ARM_VFP_FPSID };
+
+static unsigned int num_fp_regs(void)
+{
+   if (((fmrx(MVFR0)  MVFR0_A_SIMD_MASK)  MVFR0_A_SIMD_BIT) == 2)
+   return 32;
+   else
+   return 16;
+}
+
+static unsigned int num_vfp_regs(void)
+{
+   /* Normal FP regs + control regs. */
+   return num_fp_regs() + ARRAY_SIZE(vfp_sysregs);
+}
+
+static int copy_vfp_regids(u64 __user *uindices)
+{
+   unsigned int i;
+   const u64 u32reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP;
+   const u64 u64reg = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
+
+   for (i = 0; i  num_fp_regs(); i++) {
+   if (put_user((u64reg | KVM_REG_ARM_VFP_BASE_REG) + i,
+uindices))
+   return -EFAULT;
+   uindices++;
+   }
+
+   for (i = 0; i  ARRAY_SIZE(vfp_sysregs); i++) {
+   if (put_user(u32reg | vfp_sysregs[i], uindices))
+   return -EFAULT;
+   uindices++;
+   }
+
+   return num_vfp_regs();
+}
+
+static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
+{
+   u32 vfpid = (id  KVM_REG_ARM_VFP_MASK);
+   u32 val;
+
+   /* Fail if we have unknown bits set. */
+   if (id  ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+  | ((1  KVM_REG_ARM_COPROC_SHIFT)-1)))
+   return -ENOENT;
+
+   if (vfpid  num_fp_regs()) {
+   if (KVM_REG_SIZE(id) != 8)
+   return -ENOENT;
+   return reg_to_user(uaddr, vcpu-arch.vfp_guest.fpregs[vfpid],
+  id);
+   }
+
+   /* FP control registers are all 32 bit. */
+   if (KVM_REG_SIZE(id) != 4)
+   return -ENOENT;
+
+   switch (vfpid) {
+   case KVM_REG_ARM_VFP_FPEXC:
+   return reg_to_user(uaddr, vcpu-arch.vfp_guest.fpexc, id);
+   case KVM_REG_ARM_VFP_FPSCR:
+   return reg_to_user(uaddr, vcpu-arch.vfp_guest.fpscr, id);
+   

[PATCH v5 14/14] KVM: ARM: Add maintainer entry for KVM/ARM

2013-01-08 Thread Christoffer Dall
Add an entry in the MAINTAINERS file for KVM/ARM.

Cc: Russell King li...@arm.linux.org.uk
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 MAINTAINERS |8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fa309ab..8349bac 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4445,6 +4445,14 @@ F:   arch/s390/include/asm/kvm*
 F: arch/s390/kvm/
 F: drivers/s390/kvm/
 
+KERNEL VIRTUAL MACHINE (KVM) FOR ARM
+M: Christoffer Dall christofferd...@gmail.com
+L: kvm...@lists.cs.columbia.edu
+W: http://http://systems.cs.columbia.edu/projects/kvm-arm
+S: Supported
+F: arch/arm/include/asm/kvm*
+F: arch/arm/kvm/
+
 KEXEC
 M: Eric Biederman ebied...@xmission.com
 W: http://kernel.org/pub/linux/utils/kernel/kexec/

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-08 Thread Christoffer Dall
When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.

Certain classes of load/store operations do not support the syndrome
information provided in the HSR and we therefore must be able to fetch
the offending instruction from guest memory and decode it manually.

We only support instruction decoding for valid reasonable MMIO operations
where trapping them do not provide sufficient information in the HSR (no
16-bit Thumb instructions provide register writeback that we care about).

The following instruction types are NOT supported for MMIO operations
despite the HSR not containing decode info:
 - any Load/Store multiple
 - any load/store exclusive
 - any load/store dual
 - anything with the PC as the dest register

This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after userspace has made the data available.

Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
(1) Guest complicated mmio instruction traps.
(2) The hardware doesn't tell us enough, so we need to read the actual
instruction which was being exectuted.
(3) KVM maps the instruction virtual address to a physical address.
(4) The guest (SMP) swaps out that page, and fills it with something else.
(5) We read the physical address, but now that's the wrong thing.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arm.h |3 
 arch/arm/include/asm/kvm_asm.h |2 
 arch/arm/include/asm/kvm_decode.h  |   47 
 arch/arm/include/asm/kvm_emulate.h |8 +
 arch/arm/include/asm/kvm_host.h|7 +
 arch/arm/include/asm/kvm_mmio.h|   51 
 arch/arm/kvm/Makefile  |2 
 arch/arm/kvm/arm.c |   14 +
 arch/arm/kvm/decode.c  |  462 
 arch/arm/kvm/emulate.c |  169 +
 arch/arm/kvm/interrupts.S  |   38 +++
 arch/arm/kvm/mmio.c|  154 
 arch/arm/kvm/mmu.c |7 -
 arch/arm/kvm/trace.h   |   21 ++
 14 files changed, 981 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_decode.h
 create mode 100644 arch/arm/include/asm/kvm_mmio.h
 create mode 100644 arch/arm/kvm/decode.c
 create mode 100644 arch/arm/kvm/mmio.c

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 3ff6f22..151c4ce 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -173,8 +173,11 @@
 #define HSR_ISS(HSR_IL - 1)
 #define HSR_ISV_SHIFT  (24)
 #define HSR_ISV(1U  HSR_ISV_SHIFT)
+#define HSR_SRT_SHIFT  (16)
+#define HSR_SRT_MASK   (0xf  HSR_SRT_SHIFT)
 #define HSR_FSC(0x3f)
 #define HSR_FSC_TYPE   (0x3c)
+#define HSR_SSE(1  21)
 #define HSR_WNR(1  6)
 #define HSR_CV_SHIFT   (24)
 #define HSR_CV (1U  HSR_CV_SHIFT)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 5e06e81..58d787b 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -77,6 +77,8 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __kvm_va_to_pa(struct kvm_vcpu *vcpu, u32 va, bool priv);
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_decode.h 
b/arch/arm/include/asm/kvm_decode.h
new file mode 100644
index 000..3c37cb9
--- /dev/null
+++ b/arch/arm/include/asm/kvm_decode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_DECODE_H__
+#define __ARM_KVM_DECODE_H__
+
+#include linux/types.h
+
+struct kvm_vcpu;
+struct kvm_exit_mmio;
+
+struct kvm_decode {
+   struct pt_regs *regs;
+   unsigned 

[PATCH v5 12/14] KVM: ARM: Handle guest faults in KVM

2013-01-08 Thread Christoffer Dall
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

We invalidate the instruction cache by MVA whenever we map a page to the
guest (no, we cannot only do it when we have an iabt because the guest
may happily read/write a page before hitting the icache) if the hardware
uses VIPT or PIPT.  In the latter case, we can invalidate only that
physical page.  In the first case, all bets are off and we simply must
invalidate the whole affair.  Not that VIVT icaches are tagged with
vmids, and we are out of the woods on that one.  Alexander Graf was nice
enough to remind us of this massive pain.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_asm.h |2 +
 arch/arm/include/asm/kvm_mmu.h |   12 +++
 arch/arm/kvm/mmu.c |  143 
 arch/arm/kvm/trace.h   |   26 +++
 4 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index f6652f6..5e06e81 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -71,6 +71,8 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 499e7b0..421a20b 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -35,4 +35,16 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 phys_addr_t kvm_mmu_get_httbr(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
+
+static inline bool kvm_is_write_fault(unsigned long hsr)
+{
+   unsigned long hsr_ec = hsr  HSR_EC_SHIFT;
+   if (hsr_ec == HSR_EC_IABT)
+   return false;
+   else if ((hsr  HSR_ISV)  !(hsr  HSR_WNR))
+   return false;
+   else
+   return true;
+}
+
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 4347d68..0ce0e77 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -21,9 +21,11 @@
 #include linux/io.h
 #include asm/idmap.h
 #include asm/pgalloc.h
+#include asm/cacheflush.h
 #include asm/kvm_arm.h
 #include asm/kvm_mmu.h
 #include asm/kvm_asm.h
+#include asm/kvm_emulate.h
 #include asm/mach/map.h
 #include trace/events/kvm.h
 
@@ -488,9 +490,148 @@ out:
return ret;
 }
 
+static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+   /*
+* If we are going to insert an instruction page and the icache is
+* either VIPT or PIPT, there is a potential problem where the host
+* (or another VM) may have used the same page as this guest, and we
+* read incorrect data from the icache.  If we're using a PIPT cache,
+* we can invalidate just that page, but if we are using a VIPT cache
+* we need to invalidate the entire icache - damn shame - as written
+* in the ARM ARM (DDI 0406C.b - Page B3-1393).
+*
+* VIVT caches are tagged using both the ASID and the VMID and doesn't
+* need any kind of flushing (DDI 0406C.b - Page B3-1392).
+*/
+   if (icache_is_pipt()) {
+   unsigned long hva = gfn_to_hva(kvm, gfn);
+   __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+   } else if (!icache_is_vivt_asid_tagged()) {
+   /* any kind of VIPT cache */
+   __flush_icache_all();
+   }
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot,
+ unsigned long fault_status)
+{
+   pte_t new_pte;
+   pfn_t pfn;
+   int ret;
+   bool write_fault, writable;
+   unsigned long mmu_seq;
+   struct kvm_mmu_memory_cache *memcache = vcpu-arch.mmu_page_cache;
+
+   write_fault = kvm_is_write_fault(vcpu-arch.hsr);
+   if (fault_status == FSC_PERM  !write_fault) {
+   kvm_err(Unexpected L2 read permission error\n);
+   return -EFAULT;
+   }
+
+   /* We need minimum second+third level pages */
+   ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+   if (ret)
+   return ret;
+
+   mmu_seq = vcpu-kvm-mmu_notifier_seq;
+   /*
+* Ensure the read of mmu_notifier_seq happens before we call
+* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
+* the page we just got a reference to gets unmapped before we have a
+* chance to grab the mmu_lock, which ensure that if the page gets
+* unmapped afterwards, the call to kvm_unmap_hva will take it away
+* from us again 

[PATCH v5 00/12] KVM/ARM vGIC support

2013-01-08 Thread Christoffer Dall
The following series implements support for the virtual generic
interrupt controller architecture for KVM/ARM.

This patch series can also be pulled from:
git://github.com/virtualopensystems/linux-kvm-arm.git
branch: kvm-arm-v15-vgic

Changes since v4:
 - Large number of commenting and formatting changes
 - Don't rely on the vgic_hcr default value
 - Fix bug in irq_fgx expand/collapse functions
 - Sanitize use of bitmaps
 - Do not use elrsr when queueing an interrupt
 - Deassert SGIs and SPIs that have already been queued
 - Set VMCR to 0 on vcpu init
 - Compute number of list registers once at init time
 - Restructure bytemap/bitmap accessor functions
 - Improve readability by adding accessor functions
 - Define GICH_ constants in gic.h and use these in the vgic code
 - Explain SGI handling in comment
 - Error patch and return value fixes and cleanups
 - Use defines instead of all hardcoded IRQ numbers
 - Refactor level interrupt handling
 - Fix several maintenace-interrupt related bugs
 - Add CPU hotplug notifier

Changes since v3:
 - Change struct kvm_device_addr to use 64 bits for ID field

Changes since v2:
 - Get rid of hardcoded guest cpu and distributor physical addresses
   and instead provide the address through the KVM_SET_DEVICE_ADDRESS
   ioctl.
 - Fix level/edge bugs
 - Fix reboot bug: retire queued, disabled interrupts

---

Christoffer Dall (2):
  KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl
  ARM: KVM: VGIC accept vcpu and dist base addresses from user space

Marc Zyngier (10):
  ARM: KVM: Keep track of currently running vcpus
  ARM: gic: define GICH offsets for VGIC support
  ARM: KVM: Initial VGIC infrastructure code
  ARM: KVM: VGIC distributor handling
  ARM: KVM: VGIC virtual CPU interface management
  ARM: KVM: vgic: retire queued, disabled interrupts
  ARM: KVM: VGIC interrupt injection
  ARM: KVM: VGIC control interface world switch
  ARM: KVM: VGIC initialisation code
  ARM: KVM: Add VGIC configuration option


 Documentation/virtual/kvm/api.txt   |   38 +
 arch/arm/include/asm/hardware/gic.h |   25 +
 arch/arm/include/asm/kvm_host.h |   18 
 arch/arm/include/asm/kvm_vgic.h |  220 +
 arch/arm/include/uapi/asm/kvm.h |   16 
 arch/arm/kernel/asm-offsets.c   |   12 
 arch/arm/kvm/Kconfig|8 
 arch/arm/kvm/Makefile   |1 
 arch/arm/kvm/arm.c  |  162 
 arch/arm/kvm/interrupts.S   |4 
 arch/arm/kvm/interrupts_head.S  |   74 ++
 arch/arm/kvm/mmio.c |3 
 arch/arm/kvm/vgic.c | 1492 +++
 include/uapi/linux/kvm.h|8 
 14 files changed, 2069 insertions(+), 12 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_vgic.h
 create mode 100644 arch/arm/kvm/vgic.c

-- 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Christoffer Dall
On ARM (and possibly other architectures) some bits are specific to the
model being emulated for the guest and user space needs a way to tell
the kernel about those bits.  An example is mmio device base addresses,
where KVM must know the base address for a given device to properly
emulate mmio accesses within a certain address range or directly map a
device with virtualiation extensions into the guest address space.

We try to make this API slightly more generic than for our specific use,
but so far only the VGIC uses this feature.

Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |   37 +
 arch/arm/include/uapi/asm/kvm.h   |   13 +
 arch/arm/kvm/arm.c|   23 ++-
 include/uapi/linux/kvm.h  |8 
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 38066a7a..668956f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2206,6 +2206,43 @@ This ioctl returns the guest registers that are 
supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
 
+4.80 KVM_SET_DEVICE_ADDRESS
+
+Capability: KVM_CAP_SET_DEVICE_ADDRESS
+Architectures: arm
+Type: vm ioctl
+Parameters: struct kvm_device_address (in)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device id is unknown
+  ENXIO:  Device not supported on current system
+  EEXIST: Address already set
+  E2BIG:  Address outside guest physical address space
+
+struct kvm_device_address {
+   __u64 id;
+   __u64 addr;
+};
+
+Specify a device address in the guest's physical address space where guests
+can access emulated or directly exposed devices, which the host kernel needs
+to know about. The id field is an architecture specific identifier for a
+specific device.
+
+ARM divides the id field into two parts, a device id and an address type id
+specific to the individual device.
+
+  bits:  | 63...   32 | 31...16 | 15...0 |
+  field: |0x  | device id   |  addr type id  |
+
+ARM currently only require this when using the in-kernel GIC support for the
+hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id.  When
+setting the base address for the guest's mapping of the VGIC virtual CPU
+and distributor interface, the ioctl must be called after calling
+KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
+this ioctl twice for any of the base addresses will return -EEXIST.
+
+
 5. The kvm_run structure
 
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 73b9615..09911a7 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -65,6 +65,19 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_CORTEX_A15  0
 #define KVM_ARM_NUM_TARGETS1
 
+/* KVM_SET_DEVICE_ADDRESS ioctl id encoding */
+#define KVM_DEVICE_TYPE_SHIFT  0
+#define KVM_DEVICE_TYPE_MASK   (0x  KVM_DEVICE_TYPE_SHIFT)
+#define KVM_DEVICE_ID_SHIFT16
+#define KVM_DEVICE_ID_MASK (0x  KVM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2 0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU  1
+
 struct kvm_vcpu_init {
__u32 target;
__u32 features[7];
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f42d828..2f39b04 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -165,6 +165,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
+   case KVM_CAP_SET_DEVICE_ADDR:
+   r = 1;
case KVM_CAP_NR_VCPUS:
r = num_online_cpus();
break;
@@ -805,10 +807,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
kvm_dirty_log *log)
return -EINVAL;
 }
 
+static int kvm_vm_ioctl_set_device_address(struct kvm *kvm,
+  struct kvm_device_address *dev_addr)
+{
+   return -ENODEV;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
   unsigned int ioctl, unsigned long arg)
 {
-   return -EINVAL;
+   struct kvm *kvm = filp-private_data;
+   void __user *argp = (void __user *)arg;
+
+   switch (ioctl) {
+   case KVM_SET_DEVICE_ADDRESS: {
+   struct kvm_device_address dev_addr;
+
+   if (copy_from_user(dev_addr, argp, sizeof(dev_addr)))
+   return -EFAULT;
+   return kvm_vm_ioctl_set_device_address(kvm, dev_addr);
+   }
+   default:
+   return -EINVAL;
+   }
 }
 
 static void cpu_init_hyp_mode(void *vector)
diff --git a/include/uapi/linux/kvm.h 

[PATCH v5 02/12] ARM: KVM: Keep track of currently running vcpus

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

When an interrupt occurs for the guest, it is sometimes necessary
to find out which vcpu was running at that point.

Keep track of which vcpu is being run in kvm_arch_vcpu_ioctl_run(),
and allow the data to be retrieved using either:
- kvm_arm_get_running_vcpu(): returns the vcpu running at this point
  on the current CPU. Can only be used in a non-preemptible context.
- kvm_arm_get_running_vcpus(): returns the per-CPU variable holding
  the running vcpus, usable for per-CPU interrupts.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_host.h |   10 ++
 arch/arm/kvm/arm.c  |   30 ++
 2 files changed, 40 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ca40795..7a9f1d5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -155,4 +155,14 @@ static inline int kvm_test_age_hva(struct kvm *kvm, 
unsigned long hva)
 {
return 0;
 }
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+
+int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
+struct kvm_one_reg;
+int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2f39b04..5180f7b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -53,11 +53,38 @@ static DEFINE_PER_CPU(unsigned long, 
kvm_arm_hyp_stack_page);
 static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
 static unsigned long hyp_default_vectors;
 
+/* Per-CPU variable containing the currently running vcpu. */
+static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
+
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
+{
+   BUG_ON(preemptible());
+   __get_cpu_var(kvm_arm_running_vcpu) = vcpu;
+}
+
+/**
+ * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
+ * Must be called from non-preemptible context
+ */
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
+{
+   BUG_ON(preemptible());
+   return __get_cpu_var(kvm_arm_running_vcpu);
+}
+
+/**
+ * kvm_arm_get_running_vcpus - get the per-CPU array of currently running 
vcpus.
+ */
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+{
+   return kvm_arm_running_vcpu;
+}
+
 int kvm_arch_hardware_enable(void *garbage)
 {
return 0;
@@ -310,10 +337,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
cpumask_clear_cpu(cpu, vcpu-arch.require_dcache_flush);
flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
}
+
+   kvm_arm_set_running_vcpu(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   kvm_arm_set_running_vcpu(NULL);
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 03/12] ARM: gic: define GICH offsets for VGIC support

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

The GICH_* constants are defined by the GIC HW spec, and even though
they only be used by KVM to begin with, define them generically in gic.h.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/hardware/gic.h |   25 +
 1 file changed, 25 insertions(+)

diff --git a/arch/arm/include/asm/hardware/gic.h 
b/arch/arm/include/asm/hardware/gic.h
index 4b1ce6c..dd1add1 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -32,6 +32,31 @@
 #define GIC_DIST_CONFIG0xc00
 #define GIC_DIST_SOFTINT   0xf00
 
+#define GICH_HCR   0x0
+#define GICH_VTR   0x4
+#define GICH_VMCR  0x8
+#define GICH_MISR  0x10
+#define GICH_EISR0 0x20
+#define GICH_EISR1 0x24
+#define GICH_ELRSR00x30
+#define GICH_ELRSR10x34
+#define GICH_APR   0xf0
+#define GICH_LR0   0x100
+
+#define GICH_HCR_EN(1  0)
+#define GICH_HCR_UIE   (1  1)
+
+#define GICH_LR_VIRTUALID  (0x3ff  0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID   (7  GICH_LR_PHYSID_CPUID_SHIFT)
+#define GICH_LR_STATE  (3  28)
+#define GICH_LR_PENDING_BIT(1  28)
+#define GICH_LR_ACTIVE_BIT (1  29)
+#define GICH_LR_EOI(1  19)
+
+#define GICH_MISR_EOI  (1  0)
+#define GICH_MISR_U(1  1)
+
 #ifndef __ASSEMBLY__
 #include linux/irqdomain.h
 struct device_node;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 04/12] ARM: KVM: Initial VGIC infrastructure code

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Wire the basic framework code for VGIC support and the initial in-kernel
MMIO support code for the VGIC, used for the distributor emulation.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_host.h |8 ++
 arch/arm/include/asm/kvm_vgic.h |   80 ++
 arch/arm/kvm/Makefile   |1 
 arch/arm/kvm/arm.c  |   27 +++
 arch/arm/kvm/interrupts.S   |4 +
 arch/arm/kvm/mmio.c |3 +
 arch/arm/kvm/vgic.c |  144 +++
 7 files changed, 266 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/include/asm/kvm_vgic.h
 create mode 100644 arch/arm/kvm/vgic.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7a9f1d5..149d62b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -37,6 +37,8 @@
 #define KVM_NR_PAGE_SIZES  1
 #define KVM_PAGES_PER_HPAGE(x) (1UL31)
 
+#include asm/kvm_vgic.h
+
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int kvm_target_cpu(void);
@@ -58,6 +60,9 @@ struct kvm_arch {
 
/* Stage-2 page table */
pgd_t *pgd;
+
+   /* Interrupt controller */
+   struct vgic_distvgic;
 };
 
 #define KVM_NR_MEM_OBJS 40
@@ -92,6 +97,9 @@ struct kvm_vcpu_arch {
struct vfp_hard_struct vfp_guest;
struct vfp_hard_struct *vfp_host;
 
+   /* VGIC state */
+   struct vgic_cpu vgic_cpu;
+
/*
 * Anything that is not used directly from assembly code goes
 * here.
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
new file mode 100644
index 000..fcfd530
--- /dev/null
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_KVM_VGIC_H
+#define __ASM_ARM_KVM_VGIC_H
+
+#include asm/hardware/gic.h
+
+struct vgic_dist {
+};
+
+struct vgic_cpu {
+};
+
+struct kvm;
+struct kvm_vcpu;
+struct kvm_run;
+struct kvm_exit_mmio;
+
+#ifdef CONFIG_KVM_ARM_VGIC
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_exit_mmio *mmio);
+
+#else
+static inline int kvm_vgic_hyp_init(void)
+{
+   return 0;
+}
+
+static inline int kvm_vgic_init(struct kvm *kvm)
+{
+   return 0;
+}
+
+static inline int kvm_vgic_create(struct kvm *kvm)
+{
+   return 0;
+}
+
+static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+   return 0;
+}
+
+static inline void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+   return 0;
+}
+
+static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   struct kvm_exit_mmio *mmio)
+{
+   return false;
+}
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+   return 0;
+}
+#endif
+
+#endif
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 44a5f4b..3c6620c 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,3 +19,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o 
coalesced_mmio.o)
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o decode.o
+obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 5180f7b..85c4cdf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,6 +61,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static bool vgic_present;
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
BUG_ON(preemptible());
@@ -183,6 +185,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 {
int r;
switch (ext) {
+   case KVM_CAP_IRQCHIP:
+   r = vgic_present;
+   break;
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -313,8 +318,16 @@ int __attribute_const__ 

[PATCH v5 06/12] ARM: KVM: VGIC distributor handling

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Add the GIC distributor emulation code. A number of the GIC features
are simply ignored as they are not required to boot a Linux guest.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_vgic.h |   82 +
 arch/arm/kvm/vgic.c |  593 +++
 2 files changed, 674 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 270dcd2..9ff0d9c 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -19,12 +19,94 @@
 #ifndef __ASM_ARM_KVM_VGIC_H
 #define __ASM_ARM_KVM_VGIC_H
 
+#include linux/kernel.h
+#include linux/kvm.h
+#include linux/kvm_host.h
+#include linux/irqreturn.h
+#include linux/spinlock.h
+#include linux/types.h
 #include asm/hardware/gic.h
 
+#define VGIC_NR_IRQS   128
+#define VGIC_NR_SGIS   16
+#define VGIC_NR_PPIS   16
+#define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
+#define VGIC_NR_SHARED_IRQS(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
+#define VGIC_MAX_CPUS  KVM_MAX_VCPUS
+
+/* Sanity checks... */
+#if (VGIC_MAX_CPUS  8)
+#error Invalid number of CPU interfaces
+#endif
+
+#if (VGIC_NR_IRQS  31)
+#error VGIC_NR_IRQS must be a multiple of 32
+#endif
+
+#if (VGIC_NR_IRQS  1024)
+#error VGIC_NR_IRQS must be = 1024
+#endif
+
+/*
+ * The GIC distributor registers describing interrupts have two parts:
+ * - 32 per-CPU interrupts (SGI + PPI)
+ * - a bunch of shared interrupts (SPI)
+ */
+struct vgic_bitmap {
+   union {
+   u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
+   DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
+   } percpu[VGIC_MAX_CPUS];
+   union {
+   u32 reg[VGIC_NR_SHARED_IRQS / 32];
+   DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
+   } shared;
+};
+
+struct vgic_bytemap {
+   u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
+   u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+};
+
 struct vgic_dist {
+#ifdef CONFIG_KVM_ARM_VGIC
+   spinlock_t  lock;
+
+   /* Virtual control interface mapping */
+   void __iomem*vctrl_base;
+
/* Distributor and vcpu interface mapping in the guest */
phys_addr_t vgic_dist_base;
phys_addr_t vgic_cpu_base;
+
+   /* Distributor enabled */
+   u32 enabled;
+
+   /* Interrupt enabled (one bit per IRQ) */
+   struct vgic_bitmap  irq_enabled;
+
+   /* Interrupt 'pin' level */
+   struct vgic_bitmap  irq_state;
+
+   /* Level-triggered interrupt in progress */
+   struct vgic_bitmap  irq_active;
+
+   /* Interrupt priority. Not used yet. */
+   struct vgic_bytemap irq_priority;
+
+   /* Level/edge triggered */
+   struct vgic_bitmap  irq_cfg;
+
+   /* Source CPU per SGI and target CPU */
+   u8  irq_sgi_sources[VGIC_MAX_CPUS][16];
+
+   /* Target CPU for each IRQ */
+   u8  irq_spi_cpu[VGIC_NR_SHARED_IRQS];
+   struct vgic_bitmap  irq_spi_target[VGIC_MAX_CPUS];
+
+   /* Bitmap indicating which CPU has something pending */
+   unsigned long   irq_pending_on_cpu;
+#endif
 };
 
 struct vgic_cpu {
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index cdb7671..bd2bd7f 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -22,6 +22,43 @@
 #include linux/io.h
 #include asm/kvm_emulate.h
 
+/*
+ * How the whole thing works (courtesy of Christoffer Dall):
+ *
+ * - At any time, the dist-irq_pending_on_cpu is the oracle that knows if
+ *   something is pending
+ * - VGIC pending interrupts are stored on the vgic.irq_state vgic
+ *   bitmap (this bitmap is updated by both user land ioctls and guest
+ *   mmio ops, and other in-kernel peripherals such as the
+ *   arch. timers) and indicate the 'wire' state.
+ * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
+ *   recalculated
+ * - To calculate the oracle, we need info for each cpu from
+ *   compute_pending_for_cpu, which considers:
+ *   - PPI: dist-irq_state  dist-irq_enable
+ *   - SPI: dist-irq_state  dist-irq_enable  dist-irq_spi_target
+ *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ * registers, stored on each vcpu. We only keep one bit of
+ * information per interrupt, making sure that only one vcpu can
+ * accept the interrupt.
+ * - The same is true when injecting an interrupt, except that we only
+ *   consider a single interrupt at a time. The irq_spi_cpu array
+ *   contains the target CPU for each SPI.
+ *
+ * The handling of level interrupts adds some extra complexity. We
+ * need to track when the interrupt has been EOIed, so we can sample
+ * the 'line' again. This is achieved as such:
+ *
+ * - When a level interrupt is moved onto a 

[PATCH v5 07/12] ARM: KVM: VGIC virtual CPU interface management

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Add VGIC virtual CPU interface code, picking pending interrupts
from the distributor and stashing them in the VGIC control interface
list registers.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_vgic.h |   30 
 arch/arm/kvm/vgic.c |  327 +++
 2 files changed, 356 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 9ff0d9c..b3133c4 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -110,8 +110,33 @@ struct vgic_dist {
 };
 
 struct vgic_cpu {
+#ifdef CONFIG_KVM_ARM_VGIC
+   /* per IRQ to LR mapping */
+   u8  vgic_irq_lr_map[VGIC_NR_IRQS];
+
+   /* Pending interrupts on this VCPU */
+   DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+   DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
+
+   /* Bitmap of used/free list registers */
+   DECLARE_BITMAP( lr_used, 64);
+
+   /* Number of list registers on this CPU */
+   int nr_lr;
+
+   /* CPU vif control registers for world switch */
+   u32 vgic_hcr;
+   u32 vgic_vmcr;
+   u32 vgic_misr;  /* Saved only */
+   u32 vgic_eisr[2];   /* Saved only */
+   u32 vgic_elrsr[2];  /* Saved only */
+   u32 vgic_apr;
+   u32 vgic_lr[64];/* A15 has only 4... */
+#endif
 };
 
+#define LR_EMPTY   0xff
+
 struct kvm;
 struct kvm_vcpu;
 struct kvm_run;
@@ -119,9 +144,14 @@ struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
+void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu);
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
  struct kvm_exit_mmio *mmio);
 
+#define irqchip_in_kernel(k)   (!!((k)-arch.vgic.vctrl_base))
+
 #else
 static inline int kvm_vgic_hyp_init(void)
 {
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index bd2bd7f..58237d5 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -152,6 +152,34 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int 
irq)
return vgic_bitmap_get_irq_val(dist-irq_enabled, vcpu-vcpu_id, irq);
 }
 
+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+
+   return vgic_bitmap_get_irq_val(dist-irq_active, vcpu-vcpu_id, irq);
+}
+
+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+
+   vgic_bitmap_set_irq_val(dist-irq_active, vcpu-vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+
+   vgic_bitmap_set_irq_val(dist-irq_active, vcpu-vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
+{
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+
+   return vgic_bitmap_get_irq_val(dist-irq_state, vcpu-vcpu_id, irq);
+}
+
 static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
 {
struct vgic_dist *dist = vcpu-kvm-arch.vgic;
@@ -711,7 +739,30 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 
reg)
 
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
-   return 0;
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+   unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
+   unsigned long pending_private, pending_shared;
+   int vcpu_id;
+
+   vcpu_id = vcpu-vcpu_id;
+   pend_percpu = vcpu-arch.vgic_cpu.pending_percpu;
+   pend_shared = vcpu-arch.vgic_cpu.pending_shared;
+
+   pending = vgic_bitmap_get_cpu_map(dist-irq_state, vcpu_id);
+   enabled = vgic_bitmap_get_cpu_map(dist-irq_enabled, vcpu_id);
+   bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
+
+   pending = vgic_bitmap_get_shared_map(dist-irq_state);
+   enabled = vgic_bitmap_get_shared_map(dist-irq_enabled);
+   bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+   bitmap_and(pend_shared, pend_shared,
+  vgic_bitmap_get_shared_map(dist-irq_spi_target[vcpu_id]),
+  VGIC_NR_SHARED_IRQS);
+
+   pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+   pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+   return (pending_private  VGIC_NR_PRIVATE_IRQS ||
+   pending_shared  VGIC_NR_SHARED_IRQS);
 }
 
 /*
@@ -737,6 +788,280 @@ static void vgic_update_state(struct kvm *kvm)
}
 }
 
+#define LR_CPUID(lr)   \
+   (((lr)  GICH_LR_PHYSID_CPUID)  

[PATCH v5 08/12] ARM: KVM: vgic: retire queued, disabled interrupts

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

An interrupt may have been disabled after being made pending on the
CPU interface (the classic case is a timer running while we're
rebooting the guest - the interrupt would kick as soon as the CPU
interface gets enabled, with deadly consequences).

The solution is to examine already active LRs, and check the
interrupt is still enabled. If not, just retire it.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/kvm/vgic.c |   30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 58237d5..49e8b27 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -71,6 +71,7 @@
 #define ACCESS_WRITE_VALUE (3  1)
 #define ACCESS_WRITE_MASK(x)   ((x)  (3  1))
 
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 
@@ -344,6 +345,7 @@ static bool handle_mmio_clear_enable_reg(struct kvm_vcpu 
*vcpu,
if (mmio-is_write) {
if (offset  4) /* Force SGI enabled */
*reg |= 0x;
+   vgic_retire_disabled_irqs(vcpu);
vgic_update_state(vcpu-kvm);
return true;
}
@@ -792,6 +794,34 @@ static void vgic_update_state(struct kvm *kvm)
(((lr)  GICH_LR_PHYSID_CPUID)  GICH_LR_PHYSID_CPUID_SHIFT)
 #define MK_LR_PEND(src, irq)   \
(GICH_LR_PENDING_BIT | ((src)  GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
+
+/*
+ * An interrupt may have been disabled after being made pending on the
+ * CPU interface (the classic case is a timer running while we're
+ * rebooting the guest - the interrupt would kick as soon as the CPU
+ * interface gets enabled, with deadly consequences).
+ *
+ * The solution is to examine already active LRs, and check the
+ * interrupt is still enabled. If not, just retire it.
+ */
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
+{
+   struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
+   int lr;
+
+   for_each_set_bit(lr, vgic_cpu-lr_used, vgic_cpu-nr_lr) {
+   int irq = vgic_cpu-vgic_lr[lr]  GICH_LR_VIRTUALID;
+
+   if (!vgic_irq_is_enabled(vcpu, irq)) {
+   vgic_cpu-vgic_irq_lr_map[irq] = LR_EMPTY;
+   clear_bit(lr, vgic_cpu-lr_used);
+   vgic_cpu-vgic_lr[lr] = ~GICH_LR_STATE;
+   if (vgic_irq_is_active(vcpu, irq))
+   vgic_irq_clear_active(vcpu, irq);
+   }
+   }
+}
+
 /*
  * Queue an interrupt to a CPU virtual interface. Return true on success,
  * or false if it wasn't possible to queue it.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 09/12] ARM: KVM: VGIC interrupt injection

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Plug the interrupt injection code. Interrupts can now be generated
from user space.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_vgic.h |8 +++
 arch/arm/kvm/arm.c  |   55 +++---
 arch/arm/kvm/vgic.c |  117 +++
 3 files changed, 170 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index b3133c4..9ff0e52 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -146,6 +146,8 @@ struct kvm_exit_mmio;
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
 void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu);
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+   bool level);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
  struct kvm_exit_mmio *mmio);
@@ -181,6 +183,12 @@ static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 static inline void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu) {}
 
+static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
+ unsigned int irq_num, bool level)
+{
+   return 0;
+}
+
 static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
return 0;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 4c2b057..8dd949c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -788,20 +788,49 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct 
kvm_irq_level *irq_level)
 
trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level-level);
 
-   if (irq_type != KVM_ARM_IRQ_TYPE_CPU)
-   return -EINVAL;
+   switch (irq_type) {
+   case KVM_ARM_IRQ_TYPE_CPU:
+   if (irqchip_in_kernel(kvm))
+   return -ENXIO;
 
-   if (vcpu_idx = nrcpus)
-   return -EINVAL;
+   if (vcpu_idx = nrcpus)
+   return -EINVAL;
 
-   vcpu = kvm_get_vcpu(kvm, vcpu_idx);
-   if (!vcpu)
-   return -EINVAL;
+   vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+   if (!vcpu)
+   return -EINVAL;
 
-   if (irq_num  KVM_ARM_IRQ_CPU_FIQ)
-   return -EINVAL;
+   if (irq_num  KVM_ARM_IRQ_CPU_FIQ)
+   return -EINVAL;
+
+   return vcpu_interrupt_line(vcpu, irq_num, level);
+   case KVM_ARM_IRQ_TYPE_PPI:
+   if (!irqchip_in_kernel(kvm))
+   return -ENXIO;
+
+   if (vcpu_idx = nrcpus)
+   return -EINVAL;
+
+   vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+   if (!vcpu)
+   return -EINVAL;
+
+   if (irq_num  VGIC_NR_SGIS || irq_num = VGIC_NR_PRIVATE_IRQS)
+   return -EINVAL;
 
-   return vcpu_interrupt_line(vcpu, irq_num, level);
+   return kvm_vgic_inject_irq(kvm, vcpu-vcpu_id, irq_num, level);
+   case KVM_ARM_IRQ_TYPE_SPI:
+   if (!irqchip_in_kernel(kvm))
+   return -ENXIO;
+
+   if (irq_num  VGIC_NR_PRIVATE_IRQS ||
+   irq_num  KVM_ARM_IRQ_GIC_MAX)
+   return -EINVAL;
+
+   return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+   }
+
+   return -EINVAL;
 }
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
@@ -880,6 +909,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
 
switch (ioctl) {
+   case KVM_CREATE_IRQCHIP: {
+   if (vgic_present)
+   return kvm_vgic_create(kvm);
+   else
+   return -ENXIO;
+   }
case KVM_SET_DEVICE_ADDRESS: {
struct kvm_device_address dev_addr;
 
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 49e8b27..65e5282 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -73,6 +73,7 @@
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
+static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
@@ -696,6 +697,9 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run 
*run,
kvm_prepare_mmio(run, mmio);
kvm_handle_mmio_return(vcpu, run);
 
+   if (updated_state)
+   vgic_kick_vcpus(vcpu-kvm);
+
return true;
 }
 
@@ -1092,6 +1096,119 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
return 

[PATCH v5 10/12] ARM: KVM: VGIC control interface world switch

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Enable the VGIC control interface to be save-restored on world switch.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/kernel/asm-offsets.c  |   12 ++
 arch/arm/kvm/interrupts_head.S |   74 
 2 files changed, 86 insertions(+)

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index c8b3272..17cea2e 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -169,6 +169,18 @@ int main(void)
   DEFINE(VCPU_HxFAR,   offsetof(struct kvm_vcpu, arch.hxfar));
   DEFINE(VCPU_HPFAR,   offsetof(struct kvm_vcpu, arch.hpfar));
   DEFINE(VCPU_HYP_PC,  offsetof(struct kvm_vcpu, arch.hyp_pc));
+#ifdef CONFIG_KVM_ARM_VGIC
+  DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, 
arch.vgic_cpu));
+  DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
+  DEFINE(VGIC_CPU_VMCR,offsetof(struct vgic_cpu, vgic_vmcr));
+  DEFINE(VGIC_CPU_MISR,offsetof(struct vgic_cpu, vgic_misr));
+  DEFINE(VGIC_CPU_EISR,offsetof(struct vgic_cpu, vgic_eisr));
+  DEFINE(VGIC_CPU_ELRSR,   offsetof(struct vgic_cpu, vgic_elrsr));
+  DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
+  DEFINE(VGIC_CPU_LR,  offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VGIC_VCTRL,   offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
   DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr));
 #endif
   return 0; 
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index f59a580..b4276ed 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,3 +1,5 @@
+#include asm/hardware/gic.h
+
 #define VCPU_USR_REG(_reg_nr)  (VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP(VCPU_USR_REG(13))
 #define VCPU_USR_LR(VCPU_USR_REG(14))
@@ -371,6 +373,49 @@ vcpu   .reqr0  @ vcpu pointer always 
in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro save_vgic_state
+#ifdef CONFIG_KVM_ARM_VGIC
+   /* Get VGIC VCTRL base into r2 */
+   ldr r2, [vcpu, #VCPU_KVM]
+   ldr r2, [r2, #KVM_VGIC_VCTRL]
+   cmp r2, #0
+   beq 2f
+
+   /* Compute the address of struct vgic_cpu */
+   add r11, vcpu, #VCPU_VGIC_CPU
+
+   /* Save all interesting registers */
+   ldr r3, [r2, #GICH_HCR]
+   ldr r4, [r2, #GICH_VMCR]
+   ldr r5, [r2, #GICH_MISR]
+   ldr r6, [r2, #GICH_EISR0]
+   ldr r7, [r2, #GICH_EISR1]
+   ldr r8, [r2, #GICH_ELRSR0]
+   ldr r9, [r2, #GICH_ELRSR1]
+   ldr r10, [r2, #GICH_APR]
+
+   str r3, [r11, #VGIC_CPU_HCR]
+   str r4, [r11, #VGIC_CPU_VMCR]
+   str r5, [r11, #VGIC_CPU_MISR]
+   str r6, [r11, #VGIC_CPU_EISR]
+   str r7, [r11, #(VGIC_CPU_EISR + 4)]
+   str r8, [r11, #VGIC_CPU_ELRSR]
+   str r9, [r11, #(VGIC_CPU_ELRSR + 4)]
+   str r10, [r11, #VGIC_CPU_APR]
+
+   /* Clear GICH_HCR */
+   mov r5, #0
+   str r5, [r2, #GICH_HCR]
+
+   /* Save list registers */
+   add r2, r2, #GICH_LR0
+   add r3, r11, #VGIC_CPU_LR
+   ldr r4, [r11, #VGIC_CPU_NR_LR]
+1: ldr r6, [r2], #4
+   str r6, [r3], #4
+   subsr4, r4, #1
+   bne 1b
+2:
+#endif
 .endm
 
 /*
@@ -379,6 +424,35 @@ vcpu   .reqr0  @ vcpu pointer always 
in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro restore_vgic_state
+#ifdef CONFIG_KVM_ARM_VGIC
+   /* Get VGIC VCTRL base into r2 */
+   ldr r2, [vcpu, #VCPU_KVM]
+   ldr r2, [r2, #KVM_VGIC_VCTRL]
+   cmp r2, #0
+   beq 2f
+
+   /* Compute the address of struct vgic_cpu */
+   add r11, vcpu, #VCPU_VGIC_CPU
+
+   /* We only restore a minimal set of registers */
+   ldr r3, [r11, #VGIC_CPU_HCR]
+   ldr r4, [r11, #VGIC_CPU_VMCR]
+   ldr r8, [r11, #VGIC_CPU_APR]
+
+   str r3, [r2, #GICH_HCR]
+   str r4, [r2, #GICH_VMCR]
+   str r8, [r2, #GICH_APR]
+
+   /* Restore list registers */
+   add r2, r2, #GICH_LR0
+   add r3, r11, #VGIC_CPU_LR
+   ldr r4, [r11, #VGIC_CPU_NR_LR]
+1: ldr r6, [r3], #4
+   str r6, [r2], #4
+   subsr4, r4, #1
+   bne 1b
+2:
+#endif
 .endm
 
 .equ vmentry,  0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 11/12] ARM: KVM: VGIC initialisation code

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Add the init code for the hypervisor, the virtual machine, and
the virtual CPUs.

An interrupt handler is also wired to allow the VGIC maintenance
interrupts, used to deal with level triggered interrupts and LR
underflows.

A CPU hotplug notifier is registered to disable/enable the interrupt
as requested.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_vgic.h |   11 ++
 arch/arm/kvm/arm.c  |   15 +++
 arch/arm/kvm/vgic.c |  223 +++
 3 files changed, 249 insertions(+)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 9ff0e52..5e81e28 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -71,6 +71,7 @@ struct vgic_bytemap {
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
spinlock_t  lock;
+   boolready;
 
/* Virtual control interface mapping */
void __iomem*vctrl_base;
@@ -144,6 +145,10 @@ struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_hyp_init(void);
+int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm);
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
@@ -153,6 +158,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run 
*run,
  struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)   (!!((k)-arch.vgic.vctrl_base))
+#define vgic_initialized(k)((k)-arch.vgic.ready)
 
 #else
 static inline int kvm_vgic_hyp_init(void)
@@ -204,6 +210,11 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 {
return 0;
 }
+
+static inline bool vgic_initialized(struct kvm *kvm)
+{
+   return true;
+}
 #endif
 
 #endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 8dd949c..ac72a8f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -659,6 +659,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
if (unlikely(vcpu-arch.target  0))
return -ENOEXEC;
 
+   /*
+* Initialize the VGIC before running a vcpu the first time on
+* this VM.
+*/
+   if (irqchip_in_kernel(vcpu-kvm) 
+   unlikely(!vgic_initialized(vcpu-kvm))) {
+   ret = kvm_vgic_init(vcpu-kvm);
+   if (ret)
+   return ret;
+   }
+
if (run-exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu-run);
if (ret)
@@ -1066,6 +1077,10 @@ static int init_hyp_mode(void)
if (err)
goto out_free_vfp;
 
+#ifdef CONFIG_KVM_ARM_VGIC
+   vgic_present = true;
+#endif
+
kvm_info(Hyp mode initialized successfully\n);
return 0;
 out_free_vfp:
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 65e5282..083639b 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -16,11 +16,19 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include linux/cpu.h
 #include linux/kvm.h
 #include linux/kvm_host.h
 #include linux/interrupt.h
 #include linux/io.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/of_irq.h
+
 #include asm/kvm_emulate.h
+#include asm/hardware/gic.h
+#include asm/kvm_arm.h
+#include asm/kvm_mmu.h
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -62,6 +70,14 @@
 #define VGIC_ADDR_UNDEF(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
+/* Physical address of vgic virtual cpu interface */
+static phys_addr_t vgic_vcpu_base;
+
+/* Virtual control interface base address */
+static void __iomem *vgic_vctrl_base;
+
+static struct device_node *vgic_node;
+
 #define ACCESS_READ_VALUE  (1  0)
 #define ACCESS_READ_RAZ(0  0)
 #define ACCESS_READ_MASK(x)((x)  (1  0))
@@ -75,6 +91,9 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static u32 vgic_nr_lr;
+
+static unsigned int vgic_maint_irq;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
int cpuid, u32 offset)
@@ -1209,6 +1228,210 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, 
unsigned int irq_num,
return 0;
 }
 
+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
+{
+   /*
+* We cannot rely on the vgic maintenance interrupt to be
+* delivered synchronously. This means we can only use it to
+* exit the 

[PATCH v5 12/12] ARM: KVM: Add VGIC configuration option

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

It is now possible to select the VGIC configuration option.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/kvm/Kconfig |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 05227cb..d32e33f 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -51,6 +51,14 @@ config KVM_ARM_MAX_VCPUS
  large, so only choose a reasonable number that you expect to
  actually use.
 
+config KVM_ARM_VGIC
+bool KVM support for Virtual GIC
+   depends on KVM_ARM_HOST  OF
+   select HAVE_KVM_IRQCHIP
+   default y
+   ---help---
+ Adds support for a hardware assisted, in-kernel GIC emulation.
+
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/4] KVM/ARM Architected Timers support

2013-01-08 Thread Christoffer Dall
The following series implements support for the architected generic
timers for KVM/ARM.

This patch series can also be pulled from:
git://github.com/virtualopensystems/linux-kvm-arm.git
branch: kvm-arm-v15-vgic-timers

Changes since v1-v4:
 - Get virtual IRQ number from DT
 - Simplify access to cntvoff and cntv_cval
 - Remove extraneous bit clearing
 - Abstract timer arming/disarming to improve code readability
 - Context switch CNTKCTL across world-switches
 - Add CPU hotplug notifier

---

Marc Zyngier (4):
  ARM: arch_timers: switch to physical timers if HYP mode is available
  ARM: KVM: arch_timers: Add guest timer core support
  ARM: KVM: arch_timers: Add timer world switch
  ARM: KVM: arch_timers: Wire the init code and config option


 arch/arm/include/asm/kvm_arch_timer.h |   85 +++
 arch/arm/include/asm/kvm_asm.h|3 
 arch/arm/include/asm/kvm_host.h   |5 +
 arch/arm/kernel/arch_timer.c  |7 +
 arch/arm/kernel/asm-offsets.c |6 +
 arch/arm/kvm/Kconfig  |8 +
 arch/arm/kvm/Makefile |1 
 arch/arm/kvm/arch_timer.c |  257 +
 arch/arm/kvm/arm.c|   14 ++
 arch/arm/kvm/coproc.c |4 +
 arch/arm/kvm/interrupts.S |2 
 arch/arm/kvm/interrupts_head.S|   93 
 arch/arm/kvm/vgic.c   |1 
 13 files changed, 484 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_arch_timer.h
 create mode 100644 arch/arm/kvm/arch_timer.c

-- 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/4] ARM: arch_timers: switch to physical timers if HYP mode is available

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

If we're booted in HYP mode, it is possible that we'll run some
kind of virtualized environment. In this case, it is a better to
switch to the physical timers, and leave the virtual timers to
guests.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
 arch/arm/kernel/arch_timer.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index c8ef207..8adcd04 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -26,6 +26,7 @@
 #include asm/arch_timer.h
 #include asm/system_info.h
 #include asm/sched_clock.h
+#include asm/virt.h
 
 static unsigned long arch_timer_rate;
 
@@ -489,10 +490,14 @@ int __init arch_timer_of_register(void)
arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
 
/*
+* If HYP mode is available, we know that the physical timer
+* has been configured to be accessible from PL1. Use it, so
+* that a guest can use the virtual timer instead.
+*
 * If no interrupt provided for virtual timer, we'll have to
 * stick to the physical timer. It'd better be accessible...
 */
-   if (!arch_timer_ppi[VIRT_PPI]) {
+   if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
arch_timer_use_virtual = false;
 
if (!arch_timer_ppi[PHYS_SECURE_PPI] ||

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/4] ARM: KVM: arch_timers: Add guest timer core support

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Add some the architected timer related infrastructure, and support timer
interrupt injection, which can happen as a resultof three possible
events:

- The virtual timer interrupt has fired while we were still
  executing the guest
- The timer interrupt hasn't fired, but it expired while we
  were doing the world switch
- A hrtimer we programmed earlier has fired

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arch_timer.h |   85 +++
 arch/arm/include/asm/kvm_host.h   |5 +
 arch/arm/kvm/arch_timer.c |  257 +
 arch/arm/kvm/interrupts.S |2 
 arch/arm/kvm/interrupts_head.S|   31 
 5 files changed, 380 insertions(+)
 create mode 100644 arch/arm/include/asm/kvm_arch_timer.h
 create mode 100644 arch/arm/kvm/arch_timer.c

diff --git a/arch/arm/include/asm/kvm_arch_timer.h 
b/arch/arm/include/asm/kvm_arch_timer.h
new file mode 100644
index 000..aed1c42
--- /dev/null
+++ b/arch/arm/include/asm/kvm_arch_timer.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_KVM_ARCH_TIMER_H
+#define __ASM_ARM_KVM_ARCH_TIMER_H
+
+#include linux/clocksource.h
+#include linux/hrtimer.h
+#include linux/workqueue.h
+
+struct arch_timer_kvm {
+#ifdef CONFIG_KVM_ARM_TIMER
+   /* Is the timer enabled */
+   boolenabled;
+
+   /* Virtual offset */
+   cycle_t cntvoff;
+#endif
+};
+
+struct arch_timer_cpu {
+#ifdef CONFIG_KVM_ARM_TIMER
+   /* Registers: control register, timer value */
+   u32 cntv_ctl;   /* Saved/restored */
+   cycle_t cntv_cval;  /* Saved/restored */
+
+   /*
+* Anything that is not used directly from assembly code goes
+* here.
+*/
+
+   /* Background timer used when the guest is not running */
+   struct hrtimer  timer;
+
+   /* Work queued with the above timer expires */
+   struct work_struct  expired;
+
+   /* Background timer active */
+   boolarmed;
+
+   /* Timer IRQ */
+   const struct kvm_irq_level  *irq;
+#endif
+};
+
+#ifdef CONFIG_KVM_ARM_TIMER
+int kvm_timer_hyp_init(void);
+int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu);
+void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_timer_hyp_init(void)
+{
+   return 0;
+};
+
+static inline int kvm_timer_init(struct kvm *kvm)
+{
+   return 0;
+}
+
+static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
+static inline void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu) {}
+static inline void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu) {}
+static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 149d62b..334b81d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -23,6 +23,7 @@
 #include asm/kvm_asm.h
 #include asm/fpstate.h
 #include asm/kvm_decode.h
+#include asm/kvm_arch_timer.h
 
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #define KVM_USER_MEM_SLOTS 32
@@ -49,6 +50,9 @@ struct kvm_arch {
/* VTTBR value associated with below pgd and vmid */
u64vttbr;
 
+   /* Timer */
+   struct arch_timer_kvm   timer;
+
/*
 * Anything that is not used directly from assembly code goes
 * here.
@@ -99,6 +103,7 @@ struct kvm_vcpu_arch {
 
/* VGIC state */
struct vgic_cpu vgic_cpu;
+   struct arch_timer_cpu timer_cpu;
 
/*
 * Anything that is not used directly from assembly code goes
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
new file mode 100644
index 000..6cb9aa3
--- /dev/null
+++ b/arch/arm/kvm/arch_timer.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier marc.zyng...@arm.com
+ *
+ * This program is free 

[PATCH v5 3/4] ARM: KVM: arch_timers: Add timer world switch

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

Do the necessary save/restore dance for the timers in the world
switch code. In the process, allow the guest to read the physical
counter, which is useful for its own clock_event_device.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_asm.h |3 +-
 arch/arm/kernel/asm-offsets.c  |6 
 arch/arm/kvm/arm.c |3 ++
 arch/arm/kvm/coproc.c  |4 +++
 arch/arm/kvm/interrupts_head.S |   62 
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 58d787b..8a60ed8 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -45,7 +45,8 @@
 #define c13_TID_URW23  /* Thread ID, User R/W */
 #define c13_TID_URO24  /* Thread ID, User R/O */
 #define c13_TID_PRIV   25  /* Thread ID, Privileged */
-#define NR_CP15_REGS   26  /* Number of regs (incl. invalid) */
+#define c14_CNTKCTL26  /* Timer Control Register (PL1) */
+#define NR_CP15_REGS   27  /* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET  0
 #define ARM_EXCEPTION_UNDEFINED   1
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 17cea2e..5ce738b 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -179,6 +179,12 @@ int main(void)
   DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
   DEFINE(VGIC_CPU_LR,  offsetof(struct vgic_cpu, vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
+#ifdef CONFIG_KVM_ARM_TIMER
+  DEFINE(VCPU_TIMER_CNTV_CTL,  offsetof(struct kvm_vcpu, 
arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, 
arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,offsetof(struct kvm, arch.timer.enabled));
+#endif
   DEFINE(KVM_VGIC_VCTRL,   offsetof(struct kvm, arch.vgic.vctrl_base));
 #endif
   DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr));
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ac72a8f..22f39d6 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -690,6 +690,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
update_vttbr(vcpu-kvm);
 
kvm_vgic_sync_to_cpu(vcpu);
+   kvm_timer_sync_to_cpu(vcpu);
 
local_irq_disable();
 
@@ -703,6 +704,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
if (ret = 0 || need_new_vmid_gen(vcpu-kvm)) {
local_irq_enable();
+   kvm_timer_sync_from_cpu(vcpu);
kvm_vgic_sync_from_cpu(vcpu);
continue;
}
@@ -742,6 +744,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 * Back from guest
 */
 
+   kvm_timer_sync_from_cpu(vcpu);
kvm_vgic_sync_from_cpu(vcpu);
 
ret = handle_exit(vcpu, run, ret);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index d782638..4ea9a98 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -222,6 +222,10 @@ static const struct coproc_reg cp15_regs[] = {
NULL, reset_unknown, c13_TID_URO },
{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
NULL, reset_unknown, c13_TID_PRIV },
+
+   /* CNTKCTL: swapped by interrupt.S. */
+   { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
+   NULL, reset_val, c14_CNTKCTL, 0x },
 };
 
 /* Target specific emulation tables */
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index dde5f8d..57cfa84 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -301,6 +301,14 @@ vcpu   .reqr0  @ vcpu pointer always 
in r0
str r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
str r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
.endif
+
+   mrc p15, 0, r2, c14, c1, 0  @ CNTKCTL
+
+   .if \store_to_vcpu == 0
+   push{r2}
+   .else
+   str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+   .endif
 .endm
 
 /*
@@ -312,6 +320,14 @@ vcpu   .reqr0  @ vcpu pointer always 
in r0
  */
 .macro write_cp15_state read_from_vcpu
.if \read_from_vcpu == 0
+   pop {r2}
+   .else
+   ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+   .endif
+
+   mcr p15, 0, r2, c14, c1, 0  @ CNTKCTL
+
+   .if \read_from_vcpu == 0
pop {r2-r12}
.else
ldr r2, [vcpu, #CP15_OFFSET(c13_CID)]
@@ -463,8 +479,29 

[PATCH v5 4/4] ARM: KVM: arch_timers: Wire the init code and config option

2013-01-08 Thread Christoffer Dall
From: Marc Zyngier marc.zyng...@arm.com

It is now possible to select CONFIG_KVM_ARM_TIMER to enable the
KVM architected timer support.

Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/kvm/Kconfig  |8 
 arch/arm/kvm/Makefile |1 +
 arch/arm/kvm/arm.c|   11 +++
 arch/arm/kvm/vgic.c   |1 +
 4 files changed, 21 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index d32e33f..739500b 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -59,6 +59,14 @@ config KVM_ARM_VGIC
---help---
  Adds support for a hardware assisted, in-kernel GIC emulation.
 
+config KVM_ARM_TIMER
+bool KVM support for Architected Timers
+   depends on KVM_ARM_VGIC  ARM_ARCH_TIMER
+   select HAVE_KVM_IRQCHIP
+   default y
+   ---help---
+ Adds support for the Architected Timers in virtual machines
+
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 3c6620c..43c4cad 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -20,3 +20,4 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o decode.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
+obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 22f39d6..38ad78a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -287,6 +287,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
kvm_mmu_free_memory_caches(vcpu);
+   kvm_timer_vcpu_terminate(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -328,6 +329,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (ret)
return ret;
 
+   /* Set up the timer */
+   kvm_timer_vcpu_init(vcpu);
+
return 0;
 }
 
@@ -1084,6 +1088,13 @@ static int init_hyp_mode(void)
vgic_present = true;
 #endif
 
+   /*
+* Init HYP architected timer support
+*/
+   err = kvm_timer_hyp_init();
+   if (err)
+   goto out_free_mappings;
+
kvm_info(Hyp mode initialized successfully\n);
return 0;
 out_free_vfp:
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 083639b..7eb94fa 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1405,6 +1405,7 @@ int kvm_vgic_init(struct kvm *kvm)
for (i = VGIC_NR_PRIVATE_IRQS; i  VGIC_NR_IRQS; i += 4)
vgic_set_target_reg(kvm, 0, i);
 
+   kvm_timer_init(kvm);
kvm-arch.vgic.ready = true;
 out:
mutex_unlock(kvm-lock);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 10/14] KVM: ARM: Demux CCSIDR in the userspace API

2013-01-08 Thread Christoffer Dall
The Cache Size Selection Register (CSSELR) selects the current Cache
Size ID Register (CCSIDR).  You write which cache you are interested
in to CSSELR, and read the information out of CCSIDR.

Which cache numbers are valid is known by reading the Cache Level ID
Register (CLIDR).

To export this state to userspace, we add a KVM_REG_ARM_DEMUX
numberspace (17), which uses 8 bits to represent which register is
being demultiplexed (0 for CCSIDR), and the lower 8 bits to represent
this demultiplexing (in our case, the CSSELR value, which is 4 bits).

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
---
 Documentation/virtual/kvm/api.txt |2 
 arch/arm/include/uapi/asm/kvm.h   |9 ++
 arch/arm/kvm/coproc.c |  164 -
 3 files changed, 172 insertions(+), 3 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 0e22874..94f17a3 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1805,6 +1805,8 @@ ARM 32-bit CP15 registers have the following id bit 
patterns:
 ARM 64-bit CP15 registers have the following id bit patterns:
   0x4003  000F zero:1 zero:4 crm:4 opc1:4 zero:3
 
+ARM CCSIDR registers are demultiplexed by CSSELR value:
+  0x4002  0011 00 csselr:8
 
 4.69 KVM_GET_ONE_REG
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 4cf6d8f..aa2684c 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -104,6 +104,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CORE   (0x0010  KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
 
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX  (0x0011  KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK  0xFF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR(0x00  KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT0
+
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
 #define KVM_ARM_IRQ_TYPE_MASK  0xff
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 95a0f5e..1827b64 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -35,6 +35,12 @@
  * Co-processor emulation
  */
 
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
kvm_inject_undefined(vcpu);
@@ -548,11 +554,113 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
return 0;
 }
 
+static bool is_valid_cache(u32 val)
+{
+   u32 level, ctype;
+
+   if (val = CSSELR_MAX)
+   return -ENOENT;
+
+   /* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+level = (val  1);
+ctype = (cache_levels  (level * 3))  7;
+
+   switch (ctype) {
+   case 0: /* No cache */
+   return false;
+   case 1: /* Instruction cache only */
+   return (val  1);
+   case 2: /* Data cache only */
+   case 4: /* Unified cache */
+   return !(val  1);
+   case 3: /* Separate instruction and data caches */
+   return true;
+   default: /* Reserved: we can't know instruction or data. */
+   return false;
+   }
+}
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+   u32 ccsidr;
+
+   /* Make sure noone else changes CSSELR during this! */
+   local_irq_disable();
+   /* Put value into CSSELR */
+   asm volatile(mcr p15, 2, %0, c0, c0, 0 : : r (csselr));
+   isb();
+   /* Read result out of CCSIDR */
+   asm volatile(mrc p15, 1, %0, c0, c0, 0 : =r (ccsidr));
+   local_irq_enable();
+
+   return ccsidr;
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+   u32 val;
+   u32 __user *uval = uaddr;
+
+   /* Fail if we have unknown bits set. */
+   if (id  ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+  | ((1  KVM_REG_ARM_COPROC_SHIFT)-1)))
+   return -ENOENT;
+
+   switch (id  KVM_REG_ARM_DEMUX_ID_MASK) {
+   case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+   if (KVM_REG_SIZE(id) != 4)
+   return -ENOENT;
+   val = (id  KVM_REG_ARM_DEMUX_VAL_MASK)
+KVM_REG_ARM_DEMUX_VAL_SHIFT;
+   if (!is_valid_cache(val))
+   return -ENOENT;
+
+   return 

[PATCH v5 05/12] ARM: KVM: VGIC accept vcpu and dist base addresses from user space

2013-01-08 Thread Christoffer Dall
User space defines the model to emulate to a guest and should therefore
decide which addresses are used for both the virtual CPU interface
directly mapped in the guest physical address space and for the emulated
distributor interface, which is mapped in software by the in-kernel VGIC
support.

Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |1 +
 arch/arm/include/asm/kvm_vgic.h   |9 +
 arch/arm/include/uapi/asm/kvm.h   |3 ++
 arch/arm/kvm/arm.c|   14 
 arch/arm/kvm/vgic.c   |   62 +
 5 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 668956f..34d3ee9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2218,6 +2218,7 @@ Errors:
   ENXIO:  Device not supported on current system
   EEXIST: Address already set
   E2BIG:  Address outside guest physical address space
+  EBUSY:  Address overlaps with other device range
 
 struct kvm_device_address {
__u64 id;
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index fcfd530..270dcd2 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -22,6 +22,9 @@
 #include asm/hardware/gic.h
 
 struct vgic_dist {
+   /* Distributor and vcpu interface mapping in the guest */
+   phys_addr_t vgic_dist_base;
+   phys_addr_t vgic_cpu_base;
 };
 
 struct vgic_cpu {
@@ -33,6 +36,7 @@ struct kvm_run;
 struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
+int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
  struct kvm_exit_mmio *mmio);
 
@@ -42,6 +46,11 @@ static inline int kvm_vgic_hyp_init(void)
return 0;
 }
 
+static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 
addr)
+{
+   return 0;
+}
+
 static inline int kvm_vgic_init(struct kvm *kvm)
 {
return 0;
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 09911a7..94e893b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -78,6 +78,9 @@ struct kvm_regs {
 #define KVM_VGIC_V2_ADDR_TYPE_DIST 0
 #define KVM_VGIC_V2_ADDR_TYPE_CPU  1
 
+#define KVM_VGIC_V2_DIST_SIZE  0x1000
+#define KVM_VGIC_V2_CPU_SIZE   0x2000
+
 struct kvm_vcpu_init {
__u32 target;
__u32 features[7];
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 85c4cdf..4c2b057 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -858,7 +858,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
kvm_dirty_log *log)
 static int kvm_vm_ioctl_set_device_address(struct kvm *kvm,
   struct kvm_device_address *dev_addr)
 {
-   return -ENODEV;
+   unsigned long dev_id, type;
+
+   dev_id = (dev_addr-id  KVM_DEVICE_ID_MASK)  KVM_DEVICE_ID_SHIFT;
+   type = (dev_addr-id  KVM_DEVICE_TYPE_MASK)  KVM_DEVICE_TYPE_SHIFT;
+
+   switch (dev_id) {
+   case KVM_ARM_DEVICE_VGIC_V2:
+   if (!vgic_present)
+   return -ENXIO;
+   return kvm_vgic_set_addr(kvm, type, dev_addr-addr);
+   default:
+   return -ENODEV;
+   }
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 1feee5a..cdb7671 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -22,6 +22,9 @@
 #include linux/io.h
 #include asm/kvm_emulate.h
 
+#define VGIC_ADDR_UNDEF(-1)
+#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
+
 #define ACCESS_READ_VALUE  (1  0)
 #define ACCESS_READ_RAZ(0  0)
 #define ACCESS_READ_MASK(x)((x)  (1  0))
@@ -142,3 +145,62 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 {
return KVM_EXIT_MMIO;
 }
+
+static bool vgic_ioaddr_overlap(struct kvm *kvm)
+{
+   phys_addr_t dist = kvm-arch.vgic.vgic_dist_base;
+   phys_addr_t cpu = kvm-arch.vgic.vgic_cpu_base;
+
+   if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+   return 0;
+   if ((dist = cpu  dist + KVM_VGIC_V2_DIST_SIZE  cpu) ||
+   (cpu = dist  cpu + KVM_VGIC_V2_CPU_SIZE  dist))
+   return -EBUSY;
+   return 0;
+}
+
+static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
+ phys_addr_t addr, phys_addr_t size)
+{
+   int ret;
+
+   if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
+   return -EEXIST;
+   if (addr + size  addr)
+   return -EINVAL;
+
+   ret = vgic_ioaddr_overlap(kvm);
+   if (ret)
+   return ret;
+   *ioaddr = addr;
+   return ret;
+}
+
+int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 

[PATCH v5 04/14] KVM: ARM: Hypervisor initialization

2013-01-08 Thread Christoffer Dall
Sets up KVM code to handle all exceptions taken to Hyp mode.

When the kernel is booted in Hyp mode, calling an hvc instruction with r0
pointing to the new vectors, the HVBAR is changed to the the vector pointers.
This allows subsystems (like KVM here) to execute code in Hyp-mode with the
MMU disabled.

We initialize other Hyp-mode registers and enables the MMU for Hyp-mode from
the id-mapped hyp initialization code. Afterwards, the HVBAR is changed to
point to KVM Hyp vectors used to catch guest faults and to switch to Hyp mode
to perform a world-switch into a KVM guest.

Also provides memory mapping code to map required code pages, data structures,
and I/O regions  accessed in Hyp mode at the same virtual address as the host
kernel virtual addresses, but which conforms to the architectural requirements
for translations in Hyp mode. This interface is added in arch/arm/kvm/arm_mmu.c
and comprises:
 - create_hyp_mappings(from, to);
 - create_hyp_io_mappings(from, to, phys_addr);
 - free_hyp_pmds();

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arm.h  |  124 ++
 arch/arm/include/asm/kvm_asm.h  |   20 ++
 arch/arm/include/asm/kvm_mmu.h  |   29 +++
 arch/arm/include/asm/pgtable-3level-hwdef.h |4 
 arch/arm/kvm/arm.c  |  177 +++
 arch/arm/kvm/init.S |   95 ++
 arch/arm/kvm/interrupts.S   |   37 
 arch/arm/kvm/mmu.c  |  248 +++
 8 files changed, 734 insertions(+)
 create mode 100644 arch/arm/include/asm/kvm_mmu.h

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c196a22..613afe2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -21,4 +21,128 @@
 
 #include asm/types.h
 
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_TGE(1  27)
+#define HCR_TVM(1  26)
+#define HCR_TTLB   (1  25)
+#define HCR_TPU(1  24)
+#define HCR_TPC(1  23)
+#define HCR_TSW(1  22)
+#define HCR_TAC(1  21)
+#define HCR_TIDCP  (1  20)
+#define HCR_TSC(1  19)
+#define HCR_TID3   (1  18)
+#define HCR_TID2   (1  17)
+#define HCR_TID1   (1  16)
+#define HCR_TID0   (1  15)
+#define HCR_TWE(1  14)
+#define HCR_TWI(1  13)
+#define HCR_DC (1  12)
+#define HCR_BSU(3  10)
+#define HCR_BSU_IS (1  10)
+#define HCR_FB (1  9)
+#define HCR_VA (1  8)
+#define HCR_VI (1  7)
+#define HCR_VF (1  6)
+#define HCR_AMO(1  5)
+#define HCR_IMO(1  4)
+#define HCR_FMO(1  3)
+#define HCR_PTW(1  2)
+#define HCR_SWIO   (1  1)
+#define HCR_VM 1
+
+/*
+ * The bits we set in HCR:
+ * TAC:Trap ACTLR
+ * TSC:Trap SMC
+ * TSW:Trap cache operations by set/way
+ * TWI:Trap WFI
+ * TIDCP:  Trap L2CTLR/L2ECTLR
+ * BSU_IS: Upgrade barriers to the inner shareable domain
+ * FB: Force broadcast of all maintainance operations
+ * AMO:Override CPSR.A and enable signaling with VA
+ * IMO:Override CPSR.I and enable signaling with VI
+ * FMO:Override CPSR.F and enable signaling with VF
+ * SWIO:   Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+   HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+   HCR_SWIO | HCR_TIDCP)
+
+/* Hyp System Control Register (HSCTLR) bits */
+#define HSCTLR_TE  (1  30)
+#define HSCTLR_EE  (1  25)
+#define HSCTLR_FI  (1  21)
+#define HSCTLR_WXN (1  19)
+#define HSCTLR_I   (1  12)
+#define HSCTLR_C   (1  2)
+#define HSCTLR_A   (1  1)
+#define HSCTLR_M   1
+#define HSCTLR_MASK(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
+HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
+
+/* TTBCR and HTCR Registers bits */
+#define TTBCR_EAE  (1  31)
+#define TTBCR_IMP  (1  30)
+#define TTBCR_SH1  (3  28)
+#define TTBCR_ORGN1(3  26)
+#define TTBCR_IRGN1(3  24)
+#define TTBCR_EPD1 (1  23)
+#define TTBCR_A1   (1  22)
+#define TTBCR_T1SZ (3  16)
+#define TTBCR_SH0  (3  12)
+#define TTBCR_ORGN0(3  10)
+#define TTBCR_IRGN0(3  8)
+#define TTBCR_EPD0 (1  7)
+#define TTBCR_T0SZ 3
+#define HTCR_MASK  (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
+
+/* Hyp Debug Configuration Register bits */
+#define HDCR_TDRA  (1  11)
+#define HDCR_TDOSA (1  10)
+#define HDCR_TDA   (1  9)
+#define HDCR_TDE  

Re: FreeBSD-amd64 fails to start with SMP on quemu-kvm

2013-01-08 Thread Artur Samborski

W dniu 08.01.2013 00:00, Marcelo Tosatti pisze:

On Mon, Jan 07, 2013 at 06:13:22PM +0100, Artur Samborski wrote:

Hello,

When i try to run FreeBSD-amd64 on more than 1 vcpu in quemu-kvm
(Fedora Core 17) eg. to run FreeBSD-9.0-RELEASE-amd64 with:

qemu-kvm -m 1024m -cpu host -smp 2 -cdrom
/storage/iso/FreeBSD-9.0-RELEASE-amd64-dvd1.iso

it freezes KVM with:

KVM internal error. Suberror: 1
emulation failure
RAX=80b0d4c0 RBX=0009f000 RCX=c080
RDX=
RSI=d238 RDI= RBP=
RSP=
R8 = R9 = R10=
R11=
R12= R13= R14=
R15=
RIP=0009f076 RFL=00010086 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =   f300 DPL=3 DS16 [-WA]
CS =0008   00209900 DPL=0 CS64 [--A]
SS =9f00 0009f000  f300 DPL=3 DS16 [-WA]
DS =0018   00c09300 DPL=0 DS   [-WA]
FS =   f300 DPL=3 DS16 [-WA]
GS =   f300 DPL=3 DS16 [-WA]
LDT=   8200 DPL=0 LDT
TR =   8b00 DPL=0 TSS64-busy
GDT= 0009f080 0020
IDT=  
CR0=8011 CR2= CR3=0009c000 CR4=0030
DR0= DR1= DR2=
DR3=
DR6=0ff0 DR7=0400
EFER=0501
Code=00 00 00 80 0f 22 c0 ea 70 f0 09 00 08 00 48 b8 c0 d4 b0 80
ff ff ff ff ff e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 99 20 00 ff ff 00 00


Artur,

Can you check whether

https://patchwork-mail.kernel.org/patch/1942681/

fixes your problem



Hi, thanks for the reply.

Unfortunately, the patch does not help. Attempt to start FreeBSD amd64 
via quemu-kvm with -smp parameter greater than 1 fails in exactly the 
same way as before.


The patch was applied to the kernel from the 3.6.11-1.fc17.src.rpm package.

Do I need some additional patches?


--
Artur
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Shadow page table gfn-pfn mappings

2013-01-08 Thread siddhesh phadke
Hello,

I have few question in kvm code.I can see in code that though EPT is
used,some of the shadow page table code is still used foe example
__direct_map() in tdp_page_fault. I guess it is used to keep track of
gfn-pfn mappings. Have I understood it correct?

I was trying to find out if there is any way in code to find out host
pfn when we have gfn. I found some hash tables in code like
mmu_page_hash,active_mmu_pages in struct kvm_arch but I could not find
a way to get host pfn when gfn is provided. Could anybody please give
some pointers regarding where to look in code or there is not such
mechanism exists?

Thanks,
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Scott Wood

On 01/08/2013 12:41:30 PM, Christoffer Dall wrote:
On ARM (and possibly other architectures) some bits are specific to  
the

model being emulated for the guest and user space needs a way to tell
the kernel about those bits.  An example is mmio device base  
addresses,

where KVM must know the base address for a given device to properly
emulate mmio accesses within a certain address range or directly map a
device with virtualiation extensions into the guest address space.

We try to make this API slightly more generic than for our specific  
use,

but so far only the VGIC uses this feature.

Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 Documentation/virtual/kvm/api.txt |   37  
+

 arch/arm/include/uapi/asm/kvm.h   |   13 +
 arch/arm/kvm/arm.c|   23 ++-
 include/uapi/linux/kvm.h  |8 
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt  
b/Documentation/virtual/kvm/api.txt

index 38066a7a..668956f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2206,6 +2206,43 @@ This ioctl returns the guest registers that  
are supported for the

 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.


+4.80 KVM_SET_DEVICE_ADDRESS
+
+Capability: KVM_CAP_SET_DEVICE_ADDRESS
+Architectures: arm
+Type: vm ioctl
+Parameters: struct kvm_device_address (in)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device id is unknown
+  ENXIO:  Device not supported on current system
+  EEXIST: Address already set
+  E2BIG:  Address outside guest physical address space
+
+struct kvm_device_address {
+   __u64 id;
+   __u64 addr;
+};


What about this is really specific to addresses?  Can't we set other  
device parameters this way?


Sort of like a device equivalent of PPC's one-reg interface.

-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Christoffer Dall
On Tue, Jan 8, 2013 at 5:36 PM, Scott Wood scottw...@freescale.com wrote:
 On 01/08/2013 12:41:30 PM, Christoffer Dall wrote:

 On ARM (and possibly other architectures) some bits are specific to the
 model being emulated for the guest and user space needs a way to tell
 the kernel about those bits.  An example is mmio device base addresses,
 where KVM must know the base address for a given device to properly
 emulate mmio accesses within a certain address range or directly map a
 device with virtualiation extensions into the guest address space.

 We try to make this API slightly more generic than for our specific use,
 but so far only the VGIC uses this feature.

 Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
 ---
  Documentation/virtual/kvm/api.txt |   37
 +
  arch/arm/include/uapi/asm/kvm.h   |   13 +
  arch/arm/kvm/arm.c|   23 ++-
  include/uapi/linux/kvm.h  |8 
  4 files changed, 80 insertions(+), 1 deletion(-)

 diff --git a/Documentation/virtual/kvm/api.txt
 b/Documentation/virtual/kvm/api.txt
 index 38066a7a..668956f 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2206,6 +2206,43 @@ This ioctl returns the guest registers that are
 supported for the
  KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.


 +4.80 KVM_SET_DEVICE_ADDRESS
 +
 +Capability: KVM_CAP_SET_DEVICE_ADDRESS
 +Architectures: arm
 +Type: vm ioctl
 +Parameters: struct kvm_device_address (in)
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENODEV: The device id is unknown
 +  ENXIO:  Device not supported on current system
 +  EEXIST: Address already set
 +  E2BIG:  Address outside guest physical address space
 +
 +struct kvm_device_address {
 +   __u64 id;
 +   __u64 addr;
 +};


 What about this is really specific to addresses?  Can't we set other device
 parameters this way?

 Sort of like a device equivalent of PPC's one-reg interface.

This has been discussed a number of times, and one or the other there
is a need for userspace to tell KVM to present memory-mapped devices
at a given address. It was also considered to make this specific to
irqchip initialization, but irqchips are different and a lot of that
code is x86-specific, so that approach was discarded.

This *could* look something like this:

struct kvm_device_param {
u64 dev_id;
u64 param_id;
u64 value;
};

but that has less clear, or at least less specific, semantics.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Scott Wood

On 01/08/2013 05:17:05 PM, Christoffer Dall wrote:
On Tue, Jan 8, 2013 at 5:36 PM, Scott Wood scottw...@freescale.com  
wrote:

 On 01/08/2013 12:41:30 PM, Christoffer Dall wrote:
 +struct kvm_device_address {
 +   __u64 id;
 +   __u64 addr;
 +};


 What about this is really specific to addresses?  Can't we set  
other device

 parameters this way?

 Sort of like a device equivalent of PPC's one-reg interface.

This has been discussed a number of times,


Sorry, this patch was just pointed out to me today.  I googled the  
patch title but couldn't find this discussion.



and one or the other there
is a need for userspace to tell KVM to present memory-mapped devices
at a given address. It was also considered to make this specific to
irqchip initialization, but irqchips are different and a lot of that
code is x86-specific, so that approach was discarded.

This *could* look something like this:

struct kvm_device_param {
u64 dev_id;
u64 param_id;
u64 value;
};

but that has less clear, or at least less specific, semantics.


Why is it less clear?  You need to have device-specific documentation  
for what id means, so why not also an enumeration of params?  Or  
just keep it as is, and rename address to value.  Whether dev and  
param are combined is orthogonal from whether it's used for  
non-address things.


If you leave it as address, either we'll have it being used for  
non-address things regardless of the name (Not a typewriter!), or  
there'll end up being yet more unnecessary ioctls, or device-specific  
things will end up getting shoved into CPU interfaces such as one-reg.   
For example, on MPIC we need to be able to specify the version of the  
chip to emulate in addition to the address at which it lives.


Also, why is it documented as an arm interface?  Shouldn't it be a  
generic interface, with other architectures currently not implementing  
any IDs?  What in the kvm_arch_vm_ioctl() wrapper is arm-specific?


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Christoffer Dall
On Tue, Jan 8, 2013 at 6:29 PM, Scott Wood scottw...@freescale.com wrote:
 On 01/08/2013 05:17:05 PM, Christoffer Dall wrote:

 On Tue, Jan 8, 2013 at 5:36 PM, Scott Wood scottw...@freescale.com
 wrote:
  On 01/08/2013 12:41:30 PM, Christoffer Dall wrote:
  +struct kvm_device_address {
  +   __u64 id;
  +   __u64 addr;
  +};
 
 
  What about this is really specific to addresses?  Can't we set other
  device
  parameters this way?
 
  Sort of like a device equivalent of PPC's one-reg interface.
 
 This has been discussed a number of times,


 Sorry, this patch was just pointed out to me today.  I googled the patch
 title but couldn't find this discussion.



I believe it was mainly discussed at the KVM Forum in person.

 and one or the other there
 is a need for userspace to tell KVM to present memory-mapped devices
 at a given address. It was also considered to make this specific to
 irqchip initialization, but irqchips are different and a lot of that
 code is x86-specific, so that approach was discarded.

 This *could* look something like this:

 struct kvm_device_param {
 u64 dev_id;
 u64 param_id;
 u64 value;
 };

 but that has less clear, or at least less specific, semantics.


 Why is it less clear?  You need to have device-specific documentation for
 what id means, so why not also an enumeration of params?  Or just keep
 it as is, and rename address to value.  Whether dev and param are
 combined is orthogonal from whether it's used for non-address things.

less clear in the sense that you have to look at more code to see what
it does. I'm not saying that it's too unclear, at all, I'm actually
fine with it, but to make my point, we can make an ioctl that's called
do_something() that takes a struct with val0, val1, val2, val3, ...


 If you leave it as address, either we'll have it being used for
 non-address things regardless of the name (Not a typewriter!), or there'll
 end up being yet more unnecessary ioctls, or device-specific things will end
 up getting shoved into CPU interfaces such as one-reg.  For example, on MPIC
 we need to be able to specify the version of the chip to emulate in addition
 to the address at which it lives.

 Also, why is it documented as an arm interface?  Shouldn't it be a generic
 interface, with other architectures currently not implementing any IDs?
 What in the kvm_arch_vm_ioctl() wrapper is arm-specific?

As I remember the argument for keeping this the point was that there
were other preferred methods for other archs to do this, and that ARM
was the only platform that had this explicit need, but maybe I'm
making this up.

I'll let Peter and Alex respond this as well, and if they're fine with
changing it to what I proposed above, then let's do that, and we can
make it a non arm-specific interface.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 01/12] KVM: ARM: Introduce KVM_SET_DEVICE_ADDRESS ioctl

2013-01-08 Thread Scott Wood

On 01/08/2013 05:49:40 PM, Christoffer Dall wrote:
On Tue, Jan 8, 2013 at 6:29 PM, Scott Wood scottw...@freescale.com  
wrote:

 On 01/08/2013 05:17:05 PM, Christoffer Dall wrote:
 This *could* look something like this:

 struct kvm_device_param {
 u64 dev_id;
 u64 param_id;
 u64 value;
 };

 but that has less clear, or at least less specific, semantics.


 Why is it less clear?  You need to have device-specific  
documentation for
 what id means, so why not also an enumeration of params?  Or  
just keep
 it as is, and rename address to value.  Whether dev and  
param are
 combined is orthogonal from whether it's used for non-address  
things.


less clear in the sense that you have to look at more code to see what
it does. I'm not saying that it's too unclear, at all, I'm actually
fine with it, but to make my point, we can make an ioctl that's called
do_something() that takes a struct with val0, val1, val2, val3, ...


Such an IOCTL would add nothing other than trading the limited and  
cumbersome ioctl namespace for something structured a bit differently  
(which isn't such a bad thing...).  A set device attribute ioctl would  
constrain it to take this number and convey it to the enumerated  
device for the enumerated configuration purpose.  There's already room  
for device-specific semantics since you can have multiple address types.


Regarding the dev/param split, it looks like you're doing the split  
anyway -- might as well make them separate struct fields rather than an  
architecture-specific bitfield encoding.



 If you leave it as address, either we'll have it being used for
 non-address things regardless of the name (Not a typewriter!), or  
there'll
 end up being yet more unnecessary ioctls, or device-specific things  
will end
 up getting shoved into CPU interfaces such as one-reg.  For  
example, on MPIC
 we need to be able to specify the version of the chip to emulate in  
addition

 to the address at which it lives.

 Also, why is it documented as an arm interface?  Shouldn't it be  
a generic
 interface, with other architectures currently not implementing any  
IDs?

 What in the kvm_arch_vm_ioctl() wrapper is arm-specific?

As I remember the argument for keeping this the point was that there
were other preferred methods for other archs to do this, and that ARM
was the only platform that had this explicit need, but maybe I'm
making this up.


Well, at least PPC has this explicit need as well. :-)

Only the toplevel mechanism would be generic; it would be up to each  
device to decide which (if any) configuration parameters it wants to  
expose through it.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 10/12] virtio-net: multiqueue support

2013-01-08 Thread Jason Wang
On 01/08/2013 07:24 PM, Wanlong Gao wrote:
 On 01/08/2013 06:14 PM, Jason Wang wrote:
 On 01/08/2013 06:00 PM, Wanlong Gao wrote:
 On 01/08/2013 05:51 PM, Jason Wang wrote:
 On 01/08/2013 05:49 PM, Wanlong Gao wrote:
 On 01/08/2013 05:29 PM, Jason Wang wrote:
 On 01/08/2013 05:07 PM, Wanlong Gao wrote:
 On 12/28/2012 06:32 PM, Jason Wang wrote:
 +} else if (nc-peer-info-type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
 +ret = -1;
 +} else {
 +ret = tap_detach(nc-peer);
 +}
 +
 +return ret;
 +}
 +
 +static void virtio_net_set_queues(VirtIONet *n)
 +{
 +int i;
 +
 +for (i = 0; i  n-max_queues; i++) {
 +if (i  n-curr_queues) {
 +assert(!peer_attach(n, i));
 +} else {
 +assert(!peer_detach(n, i));
 I got a assert here,
 qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: 
 virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

 Any thoughts?

 Thanks,
 Wanlong Gao
 Thanks for the testing, which steps or cases did you met this assertion,
 migration, reboot or just changing the number of virtqueues?
 I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap 
 support.

 I just can't start the QEMU use  -netdev 
 tap,id=hostnet0,queues=2,fd=%d,fd=%d -device 
 virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

 I pre-opened two tap fds, did I missing something?
 Nothing missed :) It should work.

 Could you please try not use fd=X and let qemu to create the file
 descriptors by itself? Btw, how did you create the two tap fds?
 Can it create descriptors itself? I get 
 qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could 
 not be initialized
 You need prepare an ifup script which default at /etc/qemu-ifup (like
 following). Or you may try to add a script=no after:

 #!/bin/sh

 switch=kvmbr0

 /sbin/ifconfig $1 0.0.0.0 up
 /usr/sbin/brctl addif $switch $1
 /usr/sbin/brctl stp $switch off

 This will let qemu create a tap fd itself and make it to be connected to
 a port of the bridge caled kvmbr0.
 I create the tap fd like this, and dup create the second fd, third fd, 
 right?
 The second and third fd should be created with TUNSETIFF with the same
 tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
 the kernel you want to create a multiqueue tap device, otherwise the
 second and third calling of TUNSETIFF will fail.
 Thank you for teaching me, I'll try it tomorrow.

 Regards,
 Wanlong Gao

Thanks, the API of multiqueue should be documented in
Documentation/networking/tuntap.txt. It's in my TODO list.

 Thanks
 int tap_fd = open(/dev/net/tun, O_RDWR);
 int vhost_fd = open(/dev/vhost-net, O_RDWR);
 char *tap_name = tap;
 char cmd[2048];
 char brctl[256];
 char netup[256];
 struct ifreq ifr;
 if (tap_fd  0) {
 printf(open tun device failed\n);
 return -1;
 }
 if (vhost_fd  0) {
 printf(open vhost-net device failed\n);
 return -1;
 }
 memset(ifr, 0, sizeof(ifr));
 memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;

 /*
  * setup tap net device
  */
 if (ioctl(tap_fd, TUNSETIFF, ifr)  0) {
 printf(setup tap net device failed\n);
 return -1;
 }

 sprintf(brctl, brctl addif virbr0 %s, tap_name);
 sprintf(netup, ifconfig %s up, tap_name);
 system(brctl);
 system(netup);

 Thanks,
 Wanlong Gao


 Thanks
 Thanks,
 Wanlong Gao

 +}
 +}
 +}
 +
 +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, 
 int ctrl);
 +
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-01-08 Thread Pandarathil, Vijaymohan R

- New ioctl which is used to pass the eventfd that is signaled when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
---
 drivers/vfio/pci/vfio_pci.c | 29 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 drivers/vfio/vfio.c |  8 
 include/linux/vfio.h|  1 +
 include/uapi/linux/vfio.h   |  9 +
 5 files changed, 48 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6c11994..4ae9526 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -207,6 +207,8 @@ static long vfio_pci_ioctl(void *device_data,
if (vdev-reset_works)
info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
+   info.flags |= VFIO_DEVICE_FLAGS_AER_NOTIFY;
+
info.num_regions = VFIO_PCI_NUM_REGIONS;
info.num_irqs = VFIO_PCI_NUM_IRQS;
 
@@ -348,6 +350,19 @@ static long vfio_pci_ioctl(void *device_data,
 
return ret;
 
+   } else if (cmd == VFIO_DEVICE_SET_ERRFD) {
+   int32_t fd = (int32_t)arg;
+
+   if (fd  0)
+   return -EINVAL;
+
+   vdev-err_trigger = eventfd_ctx_fdget(fd);
+
+   if (IS_ERR(vdev-err_trigger))
+   return PTR_ERR(vdev-err_trigger);
+
+   return 0;
+
} else if (cmd == VFIO_DEVICE_RESET)
return vdev-reset_works ?
pci_reset_function(vdev-pdev) : -EINVAL;
@@ -527,11 +542,25 @@ static void vfio_pci_remove(struct pci_dev *pdev)
kfree(vdev);
 }
 
+static pci_ers_result_t vfio_err_detected(struct pci_dev *pdev,
+   pci_channel_state_t state)
+{
+   struct vfio_pci_device *vdev = vfio_get_vdev(pdev-dev);
+
+   eventfd_signal(vdev-err_trigger, 1);
+   return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static const struct pci_error_handlers vfio_err_handlers = {
+   .error_detected = vfio_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
.name   = vfio-pci,
.id_table   = NULL, /* only dynamic ids */
.probe  = vfio_pci_probe,
.remove = vfio_pci_remove,
+   .err_handler= vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index 611827c..daee62f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -55,6 +55,7 @@ struct vfio_pci_device {
boolbardirty;
struct pci_saved_state  *pci_saved_state;
atomic_trefcnt;
+   struct eventfd_ctx  *err_trigger;
 };
 
 #define is_intx(vdev) (vdev-irq_type == VFIO_PCI_INTX_IRQ_INDEX)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 56097c6..5ed5a54 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -693,6 +693,14 @@ void *vfio_del_group_dev(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(vfio_del_group_dev);
 
+void *vfio_get_vdev(struct device *dev)
+{
+   struct vfio_device *device = dev_get_drvdata(dev);
+
+   return device-device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_get_vdev);
+
 /**
  * VFIO base fd, /dev/vfio/vfio
  */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..3c97b03 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,6 +45,7 @@ extern int vfio_add_group_dev(struct device *dev,
  void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+extern void *vfio_get_vdev(struct device *dev);
 
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 4758d1b..fa67213 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -147,6 +147,7 @@ struct vfio_device_info {
__u32   flags;
 #define VFIO_DEVICE_FLAGS_RESET(1  0)/* Device supports 
reset */
 #define VFIO_DEVICE_FLAGS_PCI  (1  1)/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1  2)  /* Supports aer notify */
__u32   num_regions;/* Max region index + 1 */
__u32   num_irqs;   /* Max IRQ index + 1 */
 };
@@ -288,6 +289,14 @@ struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET  _IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/**
+ * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 

[PATCH 2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-01-08 Thread Pandarathil, Vijaymohan R
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through a new ioctl

- When the device encounters an error, the eventfd is signaled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to terminate the guest.

Signed-off-by: Vijay Mohan Pandarathil vijaymohan.pandarat...@hp.com
---
 hw/vfio_pci.c  | 56 ++
 linux-headers/linux/vfio.h |  9 
 2 files changed, 65 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 28c8303..9c3c28b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include qemu/error-report.h
 #include qemu/queue.h
 #include qemu/range.h
+#include sysemu/sysemu.h
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -130,6 +131,8 @@ typedef struct VFIODevice {
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
 bool reset_works;
+EventNotifier errfd;
+__u32 dev_info_flags;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 DPRINTF(Device %s flags: %u, regions: %u, irgs: %u\n, name,
 dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
 
+vdev-dev_info_flags = dev_info.flags;
+
 if (!(dev_info.flags  VFIO_DEVICE_FLAGS_PCI)) {
 error_report(vfio: Um, this isn't a PCI device\n);
 goto error;
@@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_errfd_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(vdev-errfd)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * the error. This requires that PCIe capabilities be
+ * exposed to the guest. At present, we just terminate the
+ * guest to contain the error.
+ */
+error_report(%s(%04x:%02x:%02x.%x) 
+Unrecoverable error detected... Terminating guest\n,
+__func__, vdev-host.domain, vdev-host.bus, vdev-host.slot,
+vdev-host.function);
+
+qemu_system_shutdown_request();
+return;
+}
+
+static void vfio_register_errfd(VFIODevice *vdev)
+{
+int32_t pfd;
+int ret;
+
+if (!(vdev-dev_info_flags  VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
+error_report(vfio: Warning: Error notification not supported for the 
device\n);
+return;
+}
+if (event_notifier_init(vdev-errfd, 0)) {
+error_report(vfio: Warning: Unable to init event notifier for error 
detection\n);
+return;
+}
+pfd = event_notifier_get_fd(vdev-errfd);
+qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
+
+ret = ioctl(vdev-fd, VFIO_DEVICE_SET_ERRFD, pfd);
+if (ret) {
+error_report(vfio: Warning: Failed to setup error fd: %d\n, ret);
+qemu_set_fd_handler(pfd, NULL, NULL, vdev);
+event_notifier_cleanup(vdev-errfd);
+}
+return;
+}
 static int vfio_initfn(PCIDevice *pdev)
 {
 VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
@@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)
 }
 }
 
+vfio_register_errfd(vdev);
+
 return 0;
 
 out_teardown:
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4758d1b..0ca4eeb 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -147,6 +147,7 @@ struct vfio_device_info {
__u32   flags;
 #define VFIO_DEVICE_FLAGS_RESET(1  0)/* Device supports 
reset */
 #define VFIO_DEVICE_FLAGS_PCI  (1  1)/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1  2)   /* Supports aer notification */
__u32   num_regions;/* Max region index + 1 */
__u32   num_irqs;   /* Max IRQ index + 1 */
 };
@@ -288,6 +289,14 @@ struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET  _IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/**
+ * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
+ *
+ * Pass the eventfd to the vfio-pci driver for signalling any device
+ * error notifications
+ */
+#define VFIO_DEVICE_SET_ERRFD   _IO(VFIO_TYPE, VFIO_BASE + 12)
+
 /*
  * The VFIO-PCI bus driver makes use of the following fixed region and
  * IRQ index mapping.  Unimplemented regions return a size of zero.
-- 
1.7.11.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] AER-KVM: Error containment of VFIO devices assigned to KVM guests

2013-01-08 Thread Pandarathil, Vijaymohan R
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.

---
Vijay Mohan Pandarathil(2):

[PATCH 1/2] VFIO-AER: Vfio-pci driver changes for supporting AER
[PATCH 2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

Kernel files changed

 drivers/vfio/pci/vfio_pci.c | 29 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 drivers/vfio/vfio.c |  8 
 include/linux/vfio.h|  1 +
 include/uapi/linux/vfio.h   |  9 +
 5 files changed, 48 insertions(+)

Qemu files changed

 hw/vfio_pci.c  | 56 ++
 linux-headers/linux/vfio.h |  9 
 2 files changed, 65 insertions(+)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] virtio-spec: fix two typos

2013-01-08 Thread akong
From: Amos Kong ak...@redhat.com

VIRTIO_NET_F_VTRL_VQ - VIRTIO_NET_F_CTRL_VQ
VIRTIO_NET_CTRL_MQ is defined to 4 in kernel code

Signed-off-by: Amos Kong ak...@redhat.com
---
 virtio-spec.lyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virtio-spec.lyx b/virtio-spec.lyx
index 78ec5d0..1ba9992 100644
--- a/virtio-spec.lyx
+++ b/virtio-spec.lyx
@@ -5156,7 +5156,7 @@ Control Virtqueue
 \end_layout
 
 \begin_layout Standard
-The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is negotiated)
+The driver uses the control virtqueue (if VIRTIO_NET_F_CTRL_VQ is negotiated)
  to send commands to manipulate various features of the device which would
  not easily map into the configuration space.
 \end_layout
@@ -5501,7 +5501,7 @@ struct virtio_net_ctrl_mq {
 
 \change_inserted 1986246365 1353594263
 
-#define VIRTIO_NET_CTRL_MQ1
+#define VIRTIO_NET_CTRL_MQ4
 \end_layout
 
 \begin_layout Plain Layout
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html