Re: [PATCH 3/3] KVM: x86: fix ready_for_interrupt reporting in split IRQ chip case

2015-11-13 Thread Steve Rutherford
On Fri, Nov 13, 2015 at 12:52 AM, Paolo Bonzini  wrote:
>
>
> On 12/11/2015 20:07, Matt Gingell wrote:
>> This patch adds a call to kvm_arch_interrupt_allowed to ensure ready for
>> interrupt is reported to user space correctly. This addresses a problem
>> observed in QEMU when kvm->ready_for_interrupt is set but the x86
>> interrupt flag is clear.
>>
>> Additionally, test that the APIC is ready to accept an interrupt before
>> reporting we are ready for injection.
>>
>> Reviewed-by: Andy Honig 
>> Signed-off-by: Matt Gingell 
>
> I think you need to add the same call to dm_request_for_irq_injection, like
>
> -   return (irqchip_split(vcpu->kvm)
> -   ? kvm_apic_accept_pic_intr(vcpu)
> -   : kvm_arch_interrupt_allowed(vcpu));
> +   if (!kvm_arch_interrupt_allowed(vcpu))
> +   return false;
> +
> +   return !lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu);
>
> At this point, just to err on the safe side, we probably should test
> kvm_event_needs_reinjection(vcpu) as well in dm_request_for_irq_injection.
This is definitely necessary. Without it, it's possible to bounce back
and forth between userspace and the kernel.

(Actually ran into this in testing yesterday evening. If you ever want
to stress test legacy interrupt handling devices, try booting Plan9)
>
> We can then make a new function kvm_vcpu_ready_for_interrupt_injection
> with the sequence of tests (kvm_cpu_has_interrupt,
> kvm_arch_interrupt_allowed, kvm_event_needs_reinjection, possibly
> kvm_apic_accept_pic_intr) so that:
>
> - dm_request_for_irq_injection becomes simply
>
> return (vcpu->run->request_interrupt_window &&
> likely(!pic_in_kernel(vcpu->kvm));
>
> - the caller of dm_request_for_irq_injection does
>
> if (dm_request_for_irq_injection(vcpu) &&
> kvm_vcpu_ready_for_interrupt_injection(vcpu))
>
> - post_kvm_run_save's assignment becomes
>
> kvm_run->ready_for_interrupt_injection =
> !pic_in_kernel(vcpu->kvm) ||
> kvm_vcpu_ready_for_interrupt_injection(vcpu);
>
> The code would make a lot of sense then; I hope it will work too. :)
>
> Paolo "ceci n'est pas une patch"
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Add baseline PIC tests.

2015-09-02 Thread Steve Rutherford
Tests basic interrupt functionality of the PIC. Relies on the QEMU
test device to raise and lower irq lines leading into the PIC.

Signed-off-by: Steve Rutherford <srutherf...@google.com>
---
 config/config-x86-common.mak |   3 +
 config/config-x86_64.mak |   2 +-
 lib/x86/apic.c   |  20 ++-
 lib/x86/apic.h   |   3 +
 lib/x86/pic.c|  80 +++
 lib/x86/pic.h|  20 +++
 x86/ioapic.c |  11 --
 x86/pic.c| 330 +++
 8 files changed, 452 insertions(+), 17 deletions(-)
 create mode 100644 lib/x86/pic.c
 create mode 100644 lib/x86/pic.h
 create mode 100644 x86/pic.c

diff --git a/config/config-x86-common.mak b/config/config-x86-common.mak
index c2f9908..a7aa17a 100644
--- a/config/config-x86-common.mak
+++ b/config/config-x86-common.mak
@@ -7,6 +7,7 @@ cflatobjs += lib/x86/smp.o
 cflatobjs += lib/x86/vm.o
 cflatobjs += lib/x86/fwcfg.o
 cflatobjs += lib/x86/apic.o
+cflatobjs += lib/x86/pic.o
 cflatobjs += lib/x86/atomic.o
 cflatobjs += lib/x86/desc.o
 cflatobjs += lib/x86/isr.o
@@ -70,6 +71,8 @@ $(TEST_DIR)/apic.elf: $(cstart.o) $(TEST_DIR)/apic.o
 
 $(TEST_DIR)/ioapic.elf: $(cstart.o) $(TEST_DIR)/ioapic.o
 
+$(TEST_DIR)/pic.elf: $(cstart.o) $(TEST_DIR)/pic.o
+
 $(TEST_DIR)/tscdeadline_latency.elf: $(cstart.o) 
$(TEST_DIR)/tscdeadline_latency.o
 
 $(TEST_DIR)/init.elf: $(cstart.o) $(TEST_DIR)/init.o
diff --git a/config/config-x86_64.mak b/config/config-x86_64.mak
index 7d4eb34..0f2c47a 100644
--- a/config/config-x86_64.mak
+++ b/config/config-x86_64.mak
@@ -7,7 +7,7 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
  $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \
  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
  $(TEST_DIR)/pcid.flat $(TEST_DIR)/debug.flat \
- $(TEST_DIR)/ioapic.flat
+ $(TEST_DIR)/ioapic.flat $(TEST_DIR)/pic.flat
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
diff --git a/lib/x86/apic.c b/lib/x86/apic.c
index 80b96d8..2e8e466 100644
--- a/lib/x86/apic.c
+++ b/lib/x86/apic.c
@@ -1,6 +1,8 @@
 #include "libcflat.h"
 #include "apic.h"
+#include "io.h"
 #include "msr.h"
+#include "pic.h"
 
 static void *g_apic = (void *)0xfee0;
 static void *g_ioapic = (void *)0xfec0;
@@ -12,9 +14,15 @@ struct apic_ops {
 u32 (*id)(void);
 };
 
-static void outb(unsigned char data, unsigned short port)
+void set_irq_line(unsigned line, int val)
 {
-asm volatile ("out %0, %1" : : "a"(data), "d"(port));
+outb(val, 0x2000 + line);
+}
+
+void toggle_irq_line(unsigned line)
+{
+set_irq_line(line, 1);
+set_irq_line(line, 0);
 }
 
 void eoi(void)
@@ -164,8 +172,10 @@ void enable_apic(void)
 xapic_write(0xf0, 0x1ff); /* spurious vector register */
 }
 
-void mask_pic_interrupts(void)
+void unmask_lvt0(void)
 {
-outb(0xff, 0x21);
-outb(0xff, 0xa1);
+int lvt0 = apic_read(APIC_LVT0);
+lvt0 &= ~APIC_LVT_MASKED;
+lvt0 = SET_APIC_DELIVERY_MODE(lvt0, APIC_MODE_EXTINT);
+apic_write(APIC_LVT0, lvt0);
 }
diff --git a/lib/x86/apic.h b/lib/x86/apic.h
index 216b98d..a5b19c0 100644
--- a/lib/x86/apic.h
+++ b/lib/x86/apic.h
@@ -20,6 +20,8 @@ typedef struct {
 
 void mask_pic_interrupts(void);
 
+void set_irq_line(unsigned line, int val);
+void toggle_irq_line(unsigned line);
 void eoi(void);
 
 void ioapic_write_redir(unsigned line, ioapic_redir_entry_t e);
@@ -37,4 +39,5 @@ uint32_t apic_id(void);
 
 int enable_x2apic(void);
 
+void unmask_lvt0(void);
 #endif
diff --git a/lib/x86/pic.c b/lib/x86/pic.c
new file mode 100644
index 000..e88d0b1
--- /dev/null
+++ b/lib/x86/pic.c
@@ -0,0 +1,80 @@
+#include "libcflat.h"
+#include "apic.h"
+#include "apic-defs.h"
+#include "io.h"
+#include "isr.h"
+#include "processor.h"
+
+#define PIC_MASTER 0x20
+#define PIC_SLAVE 0xA0
+#define PIC_MASTER_COMMAND PIC_MASTER
+#define PIC_MASTER_DATA (PIC_MASTER+1)
+#define PIC_SLAVE_COMMAND PIC_SLAVE
+#define PIC_SLAVE_DATA (PIC_SLAVE+1)
+
+#define PIC_IRQ_MASK 0x7
+#define PIC_NONSPECIFIC_EOI 0x20
+#define PIC_EOI 0x60
+
+#define PIC_MASTER_CASCADE_LINE 0x2
+
+unsigned char pic_read_data(int slave)
+{
+   unsigned char port = (slave) ? PIC_SLAVE_DATA : PIC_MASTER_DATA;
+
+   return inb(port);
+}
+
+void pic_write_data(unsigned char value, int slave)
+{
+   unsigned char port = (slave) ? PIC_SLAVE_DATA : PIC_MASTER_DATA;
+
+   outb(value, port);
+}
+
+unsigned char pic_read_command(int slave)
+{
+   unsigned char port = (slave) ? PIC_SLAVE_COMMAND : PIC_MASTER_COMMAND;
+
+   return inb(port);
+}
+
+void pic_write_command(unsigned char value, int slave)
+{
+   unsigned char port = (slave) ? PIC_SLAVE_COMMAND : PIC_MASTER_COMMAND;
+
+   outb

Re: [PATCH 1/2] KVM: x86: set TMR when the interrupt is accepted

2015-09-02 Thread Steve Rutherford
On Thu, Aug 13, 2015 at 09:31:48AM +0200, Paolo Bonzini wrote:
Pinging this thread.

Should I put together a patch to make split irqchip work properly with the old 
TMR behavior?

> 
> 
> On 13/08/2015 08:35, Zhang, Yang Z wrote:
> >> You may be right. It is safe if no future hardware plans to use
> >> it. Let me check with our hardware team to see whether it will be
> >> used or not in future.
> > 
> > After checking with Jun, there is no guarantee that the guest running
> > on another CPU will operate properly if hypervisor modify the vTMR
> > from another CPU. So the hypervisor should not to do it.
> 
> I guess I can cause a vmexit on level-triggered interrupts, it's not a
> big deal, but no weasel words, please.
> 
> What's going to break, and where is it documented?
> 
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-30 Thread Steve Rutherford
On Thu, Jul 30, 2015 at 11:38:20AM +0200, Paolo Bonzini wrote:
 
 
 On 30/07/2015 10:37, Steve Rutherford wrote:
  This looks a bit non-sensical, but is overprepared for the introduction
  IOAPIC hotplug, which two patches down the line. Changing it is fine,
  you'll just need to merge the very same change back.
 
 By IOAPIC hotplug you mean changing the number of reserved routes?  Is
 it necessary?  You could just reserve a bunch of routes depending on the
 maximum number of IOAPICs.
Hmm. Yeah, I think that might be cleaner. Thinking about it, I'm a bit nervous
about the idea of the number of reserved routes shrinking. We would have needed
to trigger an IOAPIC scan if the number of reserved routes changed.

Jan might have an opinion here.

 
 And especially, is it documented? :)  The docs say Fails [...] if the
 irqchip is already in the kernel (i.e. KVM_CREATE_IRQCHIP has already
 been called).
The documentation was out of date D: 

 
 As before, no need to resend patches for now.  Let's finish discussing
 all pending points, then I'll push what I have to kvm.git and you can
 test it with your VMM.  There should be time between this week and the next.
 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-30 Thread Steve Rutherford
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
Suggested-by: Andrew Honig aho...@google.com
---
 Documentation/virtual/kvm/api.txt | 15 +++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c  |  4 +++-
 arch/x86/kvm/ioapic.h |  8 
 arch/x86/kvm/irq.h| 11 ++-
 arch/x86/kvm/irq_comm.c   |  9 -
 arch/x86/kvm/lapic.c  |  6 --
 arch/x86/kvm/vmx.c|  4 ++--
 arch/x86/kvm/x86.c| 25 +++--
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 11 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4ebcb7..b655024 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3620,6 +3620,21 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 --
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d3e7a53..b4fdf0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -670,6 +670,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f588eb7..08116ff 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include linux/kvm_host.h
 #include linux/slab.h
 
+#include ioapic.h
 #include irq.h
 #include i8254.h
 #include x86.h
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int 
is_period)
struct kvm_kpit_state *ps = kvm-arch.vpit-pit_state;
s64 interval;
 
-   if (ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
+   if (!ioapic_in_kernel(kvm) ||
+   ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
return;
 
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index bf36d66..a8842c0 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -97,6 +97,14 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm-arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+   int ret;
+
+   ret = (ioapic_irqchip(kvm) != NULL);
+   return ret;
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 3d782a2..72af5a9 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,13 +83,22 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm-arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+   return kvm-arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
+   bool ret;
struct kvm_pic *vpic = pic_irqchip(kvm);
 
/* Read vpic before kvm-irq_routing.  */
smp_rmb();
-   return vpic != NULL;
+   ret = (vpic != NULL);
+   ret |= irqchip_split(kvm);
+
+   return ret;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..67f6b62 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, kvm-arch.irq_sources_bitmap);
-   if (!irqchip_in_kernel(kvm))
+   if (!ioapic_in_kernel(kvm))
goto unlock;
 
kvm_ioapic_clear_all(kvm-arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
return kvm_set_irq_routing(kvm, default_routing,
   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm

[PATCH v7 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-30 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.h   |  2 ++
 arch/x86/kvm/irq_comm.c | 42 +
 arch/x86/kvm/lapic.c|  3 +--
 arch/x86/kvm/x86.c  | 11 ++-
 include/linux/kvm_host.h| 20 
 virt/kvm/irqchip.c  | 12 ++--
 7 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b27f54d..ed896fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -674,6 +674,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0..30023ae 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS 48
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -122,4 +123,5 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9e69296a..e7566f1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   if (!irqchip_split(kvm))
-   kvm_vcpu_request_scan_ioapic(kvm);
+   kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4159f29..f9756e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3572,6 +3572,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EEXIST;
if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
+
+   r = -EINVAL

[PATCH v7 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-07-30 Thread Steve Rutherford
Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 12 
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/lapic.c  | 24 +---
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  2 +-
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index b655024..78d0ae8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3302,6 +3302,18 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
 
+   /* KVM_EXIT_IOAPIC_EOI */
+   struct {
+   __u8 vector;
+   } eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4fdf0c..b27f54d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -561,6 +561,8 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2f486d8..9e69296a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -877,15 +877,25 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic 
*apic, int vector)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-   if (kvm_ioapic_handles_vector(apic, vector)) {
-   int trigger_mode;
-   if (apic_test_vector(vector, apic-regs + APIC_TMR))
-   trigger_mode = IOAPIC_LEVEL_TRIG;
-   else
-   trigger_mode = IOAPIC_EDGE_TRIG;
+   int trigger_mode;
+
+   /* Eoi the ioapic only if the ioapic doesn't own the vector. */
+   if (!kvm_ioapic_handles_vector(apic, vector))
+   return;
 
-   kvm_ioapic_update_eoi(apic-vcpu, vector, trigger_mode);
+   /* Request a KVM exit to inform the userspace IOAPIC. */
+   if (irqchip_split(apic-vcpu-kvm)) {
+   apic-vcpu-arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic-vcpu);
+   return;
}
+
+   if (apic_test_vector(vector, apic-regs + APIC_TMR))
+   trigger_mode = IOAPIC_LEVEL_TRIG;
+   else
+   trigger_mode = IOAPIC_EDGE_TRIG;
+
+   kvm_ioapic_update_eoi(apic-vcpu, vector, trigger_mode);
 }
 
 static int apic_set_eoi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8e40ddf..4159f29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6268,6 +6268,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu-arch.pending_ioapic_eoi  255);
+   if (test_bit(vcpu-arch.pending_ioapic_eoi,
+(void *) vcpu-arch.eoi_exit_bitmap)) {
+   vcpu-run-exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu-run-eoi.vector =
+   vcpu-arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1867b83..253717c 100644

Re: [PATCH v8 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-30 Thread Steve Rutherford
On Thu, Jul 30, 2015 at 09:43:58AM +0200, Paolo Bonzini wrote:
 
 
 On 30/07/2015 08:32, Steve Rutherford wrote:
  +u8 kvm_arch_nr_userspace_ioapic_pins(struct kvm *kvm);
   #else
   static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
   {
   }
  +static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
  +{
  +}
  +static inline u8 kvm_arch_nr_userspace_ioapic_pins(struct kvm *kvm)
  +{
  +   return 0;
  +}
 
 Unused function?  I can remove it myself, but I'd first like to make
 sure that I haven't missed anything.
Jeeze, yeah, that function is totally dead.
 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-30 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.h   |  2 ++
 arch/x86/kvm/irq_comm.c | 42 +
 arch/x86/kvm/lapic.c|  3 +--
 arch/x86/kvm/x86.c  | 11 ++-
 include/linux/kvm_host.h| 15 +++
 virt/kvm/irqchip.c  | 12 ++--
 7 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b27f54d..ed896fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -674,6 +674,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0..cbb06a0 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -122,4 +123,5 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9e69296a..e7566f1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   if (!irqchip_split(kvm))
-   kvm_vcpu_request_scan_ioapic(kvm);
+   kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4159f29..f9756e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3572,6 +3572,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EEXIST;
if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
+
+   r = -EINVAL

Re: [PATCH v7 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-30 Thread Steve Rutherford
On Thu, Jul 30, 2015 at 10:21:54AM +0200, Paolo Bonzini wrote:
 Please review this diff:
 
 diff --git b/arch/x86/kvm/irq.h a/arch/x86/kvm/irq.h
 index 72af5a989a2e..975cf33ef306 100644
 --- b/arch/x86/kvm/irq.h
 +++ a/arch/x86/kvm/irq.h
 @@ -93,11 +90,11 @@ static inline int irqchip_split(struct kvm *kvm)
   bool ret;
   struct kvm_pic *vpic = pic_irqchip(kvm);
  
 - /* Read vpic before kvm-irq_routing.  */
 - smp_rmb();
   ret = (vpic != NULL);
   ret |= irqchip_split(kvm);
  
 + /* Read vpic before kvm-irq_routing.  */
 + smp_rmb();
   return ret;
  }
  
 diff --git b/arch/x86/kvm/lapic.c a/arch/x86/kvm/lapic.c
 index 2f486d8ecdae..a86324ca9cc3 100644
 --- b/arch/x86/kvm/lapic.c
 +++ a/arch/x86/kvm/lapic.c
 @@ -209,7 +209,7 @@ out:
   if (old)
   kfree_rcu(old, rcu);
  
 - if (!irqchip_split(kvm))
 + if (ioapic_in_kernel(kvm))
   kvm_vcpu_request_scan_ioapic(kvm);
  }
  
 @@ -1846,7 +1846,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
   kvm_x86_ops-hwapic_isr_update(vcpu-kvm,
   apic_find_highest_isr(apic));
   kvm_make_request(KVM_REQ_EVENT, vcpu);
 - if (!ioapic_in_kernel(vcpu-kvm))
 + if (ioapic_in_kernel(vcpu-kvm))
   kvm_rtc_eoi_tracking_restore_one(vcpu);
  }
  
 diff --git b/arch/x86/kvm/x86.c a/arch/x86/kvm/x86.c
 index 49a98608e3f6..5d2b8695732c 100644
 --- b/arch/x86/kvm/x86.c
 +++ a/arch/x86/kvm/x86.c
 @@ -3573,16 +3573,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
   r = -EEXIST;
   if (irqchip_in_kernel(kvm))
   goto split_irqchip_unlock;
 - if (!irqchip_split(kvm)) {
 - if (atomic_read(kvm-online_vcpus))
 - goto split_irqchip_unlock;
 - r = kvm_setup_empty_irq_routing(kvm);
 - if (r)
 - goto split_irqchip_unlock;
 - /* Pairs with irqchip_in_kernel. */
 - smp_wmb();
 - kvm-arch.irqchip_split = true;
 - }
 + if (atomic_read(kvm-online_vcpus))
 + goto split_irqchip_unlock;
 + r = kvm_setup_empty_irq_routing(kvm);
 + if (r)
 + goto split_irqchip_unlock;
 + /* Pairs with irqchip_in_kernel. */
 + smp_wmb();
 + kvm-arch.irqchip_split = true;
This looks a bit non-sensical, but is overprepared for the introduction
IOAPIC hotplug, which two patches down the line. Changing it is fine,
you'll just need to merge the very same change back.

   r = 0;
  split_irqchip_unlock:
   mutex_unlock(kvm-lock);
 @@ -3701,7 +3699,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
   }
  
   r = -ENXIO;
 - if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
 + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
   goto get_irqchip_out;
   r = kvm_vm_ioctl_get_irqchip(kvm, chip);
   if (r)
 @@ -3725,7 +3723,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
   }
  
   r = -ENXIO;
 - if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
 + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
   goto set_irqchip_out;
   r = kvm_vm_ioctl_set_irqchip(kvm, chip);
   if (r)
 
 No need to resend.
 
 Paolo

Looks good.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-30 Thread Steve Rutherford
On Thu, Jul 30, 2015 at 08:23:43AM +0200, Jan Kiszka wrote:
 On 2015-07-29 22:27, Steve Rutherford wrote:
  On Wed, Jul 29, 2015 at 02:38:09PM +0200, Paolo Bonzini wrote:
 
 
  On 28/07/2015 01:17, Steve Rutherford wrote:
  diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
  index d8cc54b..f6ce112 100644
  --- a/arch/x86/kvm/ioapic.h
  +++ b/arch/x86/kvm/ioapic.h
  @@ -9,6 +9,7 @@ struct kvm;
   struct kvm_vcpu;
   
   #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
  +#define MAX_NR_RESERVED_IOAPIC_PINS 48
 
  Why is this needed?
  This constant is used to bound the number of IOAPIC pins that are
  reservable when enabling KVM_CAP_SPLIT_IRQCHIP. IIRC, x86 doesn't
  support more than 2 IOAPICs.  
 
 Huh? Surely not. I've already seen boxes with at least three, and I
 think you can even hot-plug them today via extension cards. Not saying
 that QEMU supports that already, even without KVM, but we must not limit
 ourselves in the kernel API.
 
 So please remove such a static limit on how many IOAPICs userspace can
 emulate or raise it to something sufficiently large that will last long
 enough.
I'll go with the latter. I'll set it to the same size as the max size of the
GSI routing table, which needs to upper bound it.

 
 Jan
 
 -- 
 Siemens AG, Corporate Technology, CT RTC ITP SES-DE
 Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-30 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.h   |  2 ++
 arch/x86/kvm/irq_comm.c | 42 +
 arch/x86/kvm/lapic.c|  3 +--
 arch/x86/kvm/x86.c  | 11 ++-
 include/linux/kvm_host.h| 20 
 virt/kvm/irqchip.c  | 12 ++--
 7 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b27f54d..ed896fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -674,6 +674,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0..cbb06a0 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -122,4 +123,5 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9e69296a..e7566f1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   if (!irqchip_split(kvm))
-   kvm_vcpu_request_scan_ioapic(kvm);
+   kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4159f29..f9756e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3572,6 +3572,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EEXIST;
if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
+
+   r

[PATCH v7 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-07-30 Thread Steve Rutherford
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupts even when the APICs are in the
kernel.

KVM_INTERRUPT now supports sending local interrupts to an APIC when
APICs are in the kernel.

The ready_for_interrupt_request flag is now only set when the CPU/APIC
will immediately accept and inject an interrupt (i.e. APIC has not
masked the PIC).

When the PIC wishes to initiate an INTA cycle with, say, CPU0, it
kicks CPU0 out of the guest, and renedezvous with CPU0 once it arrives
in userspace.

When the CPU/APIC unmasks the PIC, a KVM_EXIT_IRQ_WINDOW_OPEN is
triggered, so that userspace has a chance to inject a PIC interrupt
if it had been pending.

Overall, this design can lead to a small number of spurious userspace
renedezvous. In particular, whenever the PIC transistions from low to
high while it is masked and whenever the PIC becomes unmasked while
it is low.

Note: this does not buffer more than one local interrupt in the
kernel, so the VMM needs to enter the guest in order to complete
interrupt injection before injecting an additional interrupt.

Compiles for x86.

Can pass the KVM Unit Tests.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 14 +
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c| 38 +
 arch/x86/kvm/irq.h|  8 +++
 arch/x86/kvm/x86.c| 44 ++-
 5 files changed, 82 insertions(+), 23 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 78d0ae8..4de4286 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@ Capability: basic
 Architectures: x86, ppc, mips
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, -1 on error
+Returns: 0 on success, negative on failure.
 
-Queues a hardware interrupt vector to be injected.  This is only
-useful if in-kernel local APIC or equivalent is not used.
+Queues a hardware interrupt vector to be injected.
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
@@ -414,7 +413,14 @@ struct kvm_interrupt {
 
 X86:
 
-Note 'irq' is an interrupt vector, not an interrupt pin or line.
+Returns: 0 on success,
+-EEXIST if an interrupt is already enqueued
+-EINVAL the the irq number is invalid
+-ENXIO if the PIC is in the kernel
+-EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
 
 PPC:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ed896fe..33201c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -563,6 +563,7 @@ struct kvm_vcpu_arch {
} pv;
 
int pending_ioapic_eoi;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..5fa0e6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v-arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
-   return pic_irqchip(v-kvm)-output; /* PIC */
-   else
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept) {
+   if (irqchip_split(v-kvm))
+   return pending_userspace_extint(v);
+   else
+   return pic_irqchip(v-kvm)-output;
+   } else
return 0;
 }
 
@@ -57,7 +70,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
-   return kvm_pic_read_irq(v-kvm); /* PIC */
-   return -1;
+   if (kvm_cpu_has_extint(v)) {
+   if (irqchip_split(v-kvm)) {
+   int vector = v-arch.pending_external_vector

Re: [PATCH 1/2] KVM: x86: set TMR when the interrupt is accepted

2015-07-30 Thread Steve Rutherford
On Thu, Jul 30, 2015 at 11:26:28PM +, Zhang, Yang Z wrote:
 Paolo Bonzini wrote on 2015-07-29:
  Do not compute TMR in advance.  Instead, set the TMR just before the
  interrupt is accepted into the IRR.  This limits the coupling between
  IOAPIC and LAPIC.
  
 
 Uh.., it back to original way which is wrong. You cannot modify the apic 
 page(here is the TMR reg) directly when the corresponding VMCS may be used at 
 same time.
Oh... Yeah. That's a damn good point, given that the interrupt can be injected 
from another thread while one is in that guest vcpu. 

Easiest time to update the TMR should be on guest entry through 
vcpu_scan_ioapic, as before. 

The best way to go is probably to ditch the new per vcpu EOI exit bitmap, and 
just update/use the TMR. There's no reason to duplicate that data in the 
representation of the apic (I suspect that the duplication was inspired by my 
mistaken notion of the TMR). The IOAPIC exit check can use the TMR instead. 

Based upon my reading of the SDM, the only reason that the eoi exit bitmaps are 
not the exact same as the TMR is that it is possible to have virtual-interrupt 
delivery enabled /without/ an apic access page (Note: V-ID = EOI exit bitmap 
enabled).

Yang, do you happen to know if that is the case?

[Note: Just looked back at the old code for updating the EOI exit bitmaps, 
which for some reason was configured to trigger EOI exits for all IOAPIC 
interrupts, not just level-triggered IOAPIC interrupts. Which is weird, and I 
believe totally unecessary.]


 
 
  Signed-off-by: Paolo Bonzini pbonz...@redhat.com
  ---
   arch/x86/kvm/ioapic.c |  9 ++---
   arch/x86/kvm/ioapic.h |  3 +--
   arch/x86/kvm/lapic.c  | 19 ++-
   arch/x86/kvm/lapic.h  |  1 -
   arch/x86/kvm/x86.c|  5 +
   5 files changed, 14 insertions(+), 23 deletions(-)
  diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
  index 856f79105bb5..eaf4ec38d980 100644
  --- a/arch/x86/kvm/ioapic.c
  +++ b/arch/x86/kvm/ioapic.c
  @@ -246,8 +246,7 @@ static void update_handled_vectors(struct kvm_ioapic
  *ioapic)
  smp_wmb();
   }
  -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
  -   u32 *tmr)
  +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
   {
  struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
  union kvm_ioapic_redirect_entry *e;
  @@ -260,13 +259,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
  u64 *eoi_exit_bitmap,
  kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC, 
  index) ||
  index == RTC_GSI) { if 
  (kvm_apic_match_dest(vcpu, NULL, 0,
  -   e-fields.dest_id, e-fields.dest_mode)) {
  +   e-fields.dest_id, e-fields.dest_mode))
  __set_bit(e-fields.vector,
  (unsigned long *)eoi_exit_bitmap);
  -   if (e-fields.trig_mode == IOAPIC_LEVEL_TRIG)
  -   __set_bit(e-fields.vector, -   
  (unsigned long *)tmr); -
  }
  }
  }
  spin_unlock(ioapic-lock);
  diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
  index ca0b0b4e6256..3dbd0e2aac4e 100644
  --- a/arch/x86/kvm/ioapic.h
  +++ b/arch/x86/kvm/ioapic.h
  @@ -120,7 +120,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
  kvm_lapic *src,
  struct kvm_lapic_irq *irq, unsigned long *dest_map);
   int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
   int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
  -   u32 *tmr);
  +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
  
   #endif
  diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  index 2a5ca97c263b..9be64c77d6db 100644
  --- a/arch/x86/kvm/lapic.c
  +++ b/arch/x86/kvm/lapic.c
  @@ -551,15 +551,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu
  *vcpu)
  __clear_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
   }
  -void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
  -{
  -   struct kvm_lapic *apic = vcpu-arch.apic;
  -   int i;
  -
  -   for (i = 0; i  8; i++)
  -   apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
  -}
  -
   static void apic_update_ppr(struct kvm_lapic *apic)
   {
  u32 tpr, isrv, ppr, old_ppr;
  @@ -781,6 +772,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
  delivery_mode,
  case APIC_DM_LOWEST:
  vcpu-arch.apic_arb_prio++;
  case APIC_DM_FIXED:
  +   if (unlikely(trig_mode  !level))
  +   break;
  +
  /* FIXME add logic for vcpu on reset */
  if (unlikely(!apic_enabled(apic)))
  break;
  @@ -790,6 +784,13 @@ static int 

Re: [PATCH v5 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 02:38:09PM +0200, Paolo Bonzini wrote:
 
 
 On 28/07/2015 01:17, Steve Rutherford wrote:
  diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
  index d8cc54b..f6ce112 100644
  --- a/arch/x86/kvm/ioapic.h
  +++ b/arch/x86/kvm/ioapic.h
  @@ -9,6 +9,7 @@ struct kvm;
   struct kvm_vcpu;
   
   #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
  +#define MAX_NR_RESERVED_IOAPIC_PINS 48
 
 Why is this needed?
This constant is used to bound the number of IOAPIC pins that are
reservable when enabling KVM_CAP_SPLIT_IRQCHIP. IIRC, x86 doesn't
support more than 2 IOAPICs.  

 
 Paolo
 
   #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
   #define IOAPIC_EDGE_TRIG  0
   #define IOAPIC_LEVEL_TRIG 1
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 02:56:42PM +0200, Paolo Bonzini wrote:
 
  +   kvm_rtc_eoi_tracking_restore_one(vcpu);
   }
   
   void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
  @@ -1921,7 +1923,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu 
  *vcpu,
  /* Cache not set: could be safe but we don't bother. */
  apic-highest_isr_cache == -1 ||
  /* Need EOI to update ioapic. */
  -   kvm_ioapic_handles_vector(vcpu-kvm, apic-highest_isr_cache)) {
  +   kvm_ioapic_handles_vector(vcpu-kvm, apic-highest_isr_cache) ||
  +   irqchip_split(vcpu-kvm)) {
 
 This is ugly (and if anything irqchip_split should be done before
 kvm_ioapic_handles_vector).  Could this just test the EOI exit bitmap
 instead?
 
That could be done. The EOI exit bitmap write paths for split and !split
would need to be consolidated. (We can't pull them from the VMCS, so we'd
need to fetch them from the one stored in kvm_vcpu).

 Also, who sets TMR in the split irqchip case?  I'll post a patch roday
 or tomorrow to compute TMR in __apic_accept_irq and to do the
 aforementioned EOI exit bitmap test.
Another option would be to compute the TMR in vcpu_scan_ioapic, by
extracting it from the EOI exit bitmaps (which would be most similar
to how it had been done previously), but I prefer computing it in
__apic_accept_irq.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] KVM: x86: set TMR when the interrupt is accepted

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 03:37:34PM +0200, Paolo Bonzini wrote:
 Do not compute TMR in advance.  Instead, set the TMR just before the interrupt
 is accepted into the IRR.  This limits the coupling between IOAPIC and LAPIC.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  arch/x86/kvm/ioapic.c |  9 ++---
  arch/x86/kvm/ioapic.h |  3 +--
  arch/x86/kvm/lapic.c  | 19 ++-
  arch/x86/kvm/lapic.h  |  1 -
  arch/x86/kvm/x86.c|  5 +
  5 files changed, 14 insertions(+), 23 deletions(-)
 
 diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
 index 856f79105bb5..eaf4ec38d980 100644
 --- a/arch/x86/kvm/ioapic.c
 +++ b/arch/x86/kvm/ioapic.c
 @@ -246,8 +246,7 @@ static void update_handled_vectors(struct kvm_ioapic 
 *ioapic)
   smp_wmb();
  }
  
 -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 - u32 *tmr)
 +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
  {
   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
   union kvm_ioapic_redirect_entry *e;
 @@ -260,13 +259,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 
 *eoi_exit_bitmap,
   kvm_irq_has_notifier(ioapic-kvm, KVM_IRQCHIP_IOAPIC, 
 index) ||
   index == RTC_GSI) {
   if (kvm_apic_match_dest(vcpu, NULL, 0,
 - e-fields.dest_id, e-fields.dest_mode)) {
 + e-fields.dest_id, e-fields.dest_mode))
   __set_bit(e-fields.vector,
   (unsigned long *)eoi_exit_bitmap);
 - if (e-fields.trig_mode == IOAPIC_LEVEL_TRIG)
 - __set_bit(e-fields.vector,
 - (unsigned long *)tmr);
 - }
   }
   }
   spin_unlock(ioapic-lock);
 diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
 index ca0b0b4e6256..3dbd0e2aac4e 100644
 --- a/arch/x86/kvm/ioapic.h
 +++ b/arch/x86/kvm/ioapic.h
 @@ -120,7 +120,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
 kvm_lapic *src,
   struct kvm_lapic_irq *irq, unsigned long *dest_map);
  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 - u32 *tmr);
 +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
  
  #endif
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 2a5ca97c263b..9be64c77d6db 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -551,15 +551,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
   __clear_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
  }
  
 -void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
 -{
 - struct kvm_lapic *apic = vcpu-arch.apic;
 - int i;
 -
 - for (i = 0; i  8; i++)
 - apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
 -}
 -
  static void apic_update_ppr(struct kvm_lapic *apic)
  {
   u32 tpr, isrv, ppr, old_ppr;
 @@ -781,6 +772,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
   case APIC_DM_LOWEST:
   vcpu-arch.apic_arb_prio++;
   case APIC_DM_FIXED:
 + if (unlikely(trig_mode  !level))
 + break;
 +
   /* FIXME add logic for vcpu on reset */
   if (unlikely(!apic_enabled(apic)))
   break;
 @@ -790,6 +784,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
 delivery_mode,
   if (dest_map)
   __set_bit(vcpu-vcpu_id, dest_map);
  
 + if (apic_test_vector(vector, apic-regs + APIC_TMR) != 
 !!trig_mode) {
 + if (trig_mode)
 + apic_set_vector(vector, apic-regs + APIC_TMR);
 + else
 + apic_clear_vector(vector, apic-regs + 
 APIC_TMR);
 + }
 +
   if (kvm_x86_ops-deliver_posted_interrupt)
   kvm_x86_ops-deliver_posted_interrupt(vcpu, vector);
   else {
 diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
 index 764037991d26..eb46d6bcaa75 100644
 --- a/arch/x86/kvm/lapic.h
 +++ b/arch/x86/kvm/lapic.h
 @@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
  u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
  void kvm_apic_set_version(struct kvm_vcpu *vcpu);
  
 -void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
  void __kvm_apic_update_irr(u32 *pir, void *regs);
  void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 23e47a0b054b..48dc954542db 100644
 --- 

Re: [PATCH 2/2] KVM: x86: store IOAPIC-handled vectors in each VCPU

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 03:37:35PM +0200, Paolo Bonzini wrote:
 We can reuse the algorithm that computes the EOI exit bitmap to figure
 out which vectors are handled by the IOAPIC.  The only difference
 between the two is for edge-triggered interrupts other than IRQ8
 that have no notifiers active; however, the IOAPIC does not have to
 do anything special for these interrupts anyway.
 
 This again limits the interactions between the IOAPIC and the LAPIC,
 making it easier to move the former to userspace.
 
 Inspired by a patch from Steve Rutherford.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  arch/x86/include/asm/kvm_host.h |  3 ++-
  arch/x86/kvm/ioapic.c   | 18 ++
  arch/x86/kvm/ioapic.h   |  8 
  arch/x86/kvm/lapic.c| 10 --
  arch/x86/kvm/svm.c  |  2 +-
  arch/x86/kvm/vmx.c  |  3 ++-
  arch/x86/kvm/x86.c  |  8 +++-
  7 files changed, 18 insertions(+), 34 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 2f9e504f9f0c..d0e991ef6ef0 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -383,6 +383,7 @@ struct kvm_vcpu_arch {
   u64 efer;
   u64 apic_base;
   struct kvm_lapic *apic;/* kernel irqchip context */
 + u64 eoi_exit_bitmap[4];
   unsigned long apic_attention;
   int32_t apic_arb_prio;
   int mp_state;
 @@ -808,7 +809,7 @@ struct kvm_x86_ops {
   int (*vm_has_apicv)(struct kvm *kvm);
   void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
   void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
   void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
   void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
   void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
 index eaf4ec38d980..2dcda0f188ba 100644
 --- a/arch/x86/kvm/ioapic.c
 +++ b/arch/x86/kvm/ioapic.c
 @@ -233,19 +233,6 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic 
 *ioapic, unsigned long irr)
  }
  
  
 -static void update_handled_vectors(struct kvm_ioapic *ioapic)
 -{
 - DECLARE_BITMAP(handled_vectors, 256);
 - int i;
 -
 - memset(handled_vectors, 0, sizeof(handled_vectors));
 - for (i = 0; i  IOAPIC_NUM_PINS; ++i)
 - __set_bit(ioapic-redirtbl[i].fields.vector, handled_vectors);
 - memcpy(ioapic-handled_vectors, handled_vectors,
 -sizeof(handled_vectors));
 - smp_wmb();
 -}
 -
  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
  {
   struct kvm_ioapic *ioapic = vcpu-kvm-arch.vioapic;
 @@ -310,7 +297,6 @@ static void ioapic_write_indirect(struct kvm_ioapic 
 *ioapic, u32 val)
   e-bits |= (u32) val;
   e-fields.remote_irr = 0;
   }
 - update_handled_vectors(ioapic);
   mask_after = e-fields.mask;
   if (mask_before != mask_after)
   kvm_fire_mask_notifiers(ioapic-kvm, 
 KVM_IRQCHIP_IOAPIC, index, mask_after);
 @@ -594,7 +580,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
   ioapic-id = 0;
   memset(ioapic-irq_eoi, 0x00, IOAPIC_NUM_PINS);
   rtc_irq_eoi_tracking_reset(ioapic);
 - update_handled_vectors(ioapic);
  }
  
  static const struct kvm_io_device_ops ioapic_mmio_ops = {
 @@ -623,8 +608,10 @@ int kvm_ioapic_init(struct kvm *kvm)
   if (ret  0) {
   kvm-arch.vioapic = NULL;
   kfree(ioapic);
 + return ret;
   }
  
 + kvm_vcpu_request_scan_ioapic(kvm);
   return ret;
  }
  
 @@ -661,7 +648,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct 
 kvm_ioapic_state *state)
   memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
   ioapic-irr = 0;
   ioapic-irr_delivered = 0;
 - update_handled_vectors(ioapic);
   kvm_vcpu_request_scan_ioapic(kvm);
   kvm_ioapic_inject_all(ioapic, state-irr);
   spin_unlock(ioapic-lock);
 diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
 index 3dbd0e2aac4e..bf36d66a1951 100644
 --- a/arch/x86/kvm/ioapic.h
 +++ b/arch/x86/kvm/ioapic.h
 @@ -73,7 +73,6 @@ struct kvm_ioapic {
   struct kvm *kvm;
   void (*ack_notifier)(void *opaque, int irq);
   spinlock_t lock;
 - DECLARE_BITMAP(handled_vectors, 256);
   struct rtc_status rtc_status;
   struct delayed_work eoi_inject;
   u32 irq_eoi[IOAPIC_NUM_PINS];
 @@ -98,13 +97,6 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
 *kvm)
   return kvm-arch.vioapic;
  }
  
 -static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
 -{
 - struct kvm_ioapic *ioapic = kvm-arch.vioapic;
 - smp_rmb();
 - return

Re: [PATCH kvm-unit-tests] x86: ioapic: add tests around retriggering of level interrupts

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 03:28:13PM +0200, Paolo Bonzini wrote:
 Test resampling of level interrupts after EOI, by leaving the IRQ
 line set in the ISR.  One tests does reset the IRQ line after a while,
 the other uses masking instead in the ISR.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  x86/ioapic.c | 51 +++
  1 file changed, 51 insertions(+)
 
 diff --git a/x86/ioapic.c b/x86/ioapic.c
 index 1fcf67e..d43d5c1 100644
 --- a/x86/ioapic.c
 +++ b/x86/ioapic.c
 @@ -188,6 +188,31 @@ static void test_ioapic_level_sequential(void)
   report(sequential level interrupts, g_isr_99 == 2);
  }
  
 +static volatile int g_isr_9a;
 +
 +static void ioapic_isr_9a(isr_regs_t *regs)
 +{
 + ++g_isr_9a;
 + if (g_isr_9a == 2)
 + set_irq_line(0x0e, 0);
 + eoi();
 +}
 +
 +static void test_ioapic_level_retrigger(void)
 +{
 + handle_irq(0x9a, ioapic_isr_9a);
 + set_ioapic_redir(0x0e, 0x9a, LEVEL_TRIGGERED);
 +
 + asm volatile (cli);
 + set_irq_line(0x0e, 1);
 + while (g_isr_9a != 2)
 + asm volatile (sti; hlt; cli);
This seems sketchy. The test should be able to exit this and fail.

Steve
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] KVM: x86: clean/fix memory barriers in irqchip_in_kernel

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 03:28:58PM +0200, Paolo Bonzini wrote:
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 2d62229aac26..23e47a0b054b 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -3626,30 +3626,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
   r = kvm_ioapic_init(kvm);
   if (r) {
   mutex_lock(kvm-slots_lock);
 - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 -   vpic-dev_master);
 - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 -   vpic-dev_slave);
 - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 -   vpic-dev_eclr);
 + kvm_destroy_pic(vpic);
   mutex_unlock(kvm-slots_lock);
 - kfree(vpic);
   goto create_irqchip_unlock;
   }
   } else
   goto create_irqchip_unlock;
 - smp_wmb();
 - kvm-arch.vpic = vpic;
 - smp_wmb();
   r = kvm_setup_default_irq_routing(kvm);
   if (r) {
   mutex_lock(kvm-slots_lock);
   mutex_lock(kvm-irq_lock);
   kvm_ioapic_destroy(kvm);
 - kvm_destroy_pic(kvm);
 + kvm_destroy_pic(vpic);
   mutex_unlock(kvm-irq_lock);
   mutex_unlock(kvm-slots_lock);
 + goto create_irqchip_unlock;
   }
 + /* Write kvm-irq_routing before kvm-arch.vpic.  */
 + smp_wmb();
I assume this pairs with irqchip_in_kernel? 
 + kvm-arch.vpic = vpic;
   create_irqchip_unlock:
   mutex_unlock(kvm-lock);
   break;
 -- 
 1.8.3.1
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] KVM: i8254: remove unnecessary irqchip_in_kernel check

2015-07-29 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 03:28:57PM +0200, Paolo Bonzini wrote:
 The PIT is only created if irqchip_in_kernel returns true, so the
 check is superfluous.
I poked around. Looks to me like the existence of an IOAPIC is not
checked on the creation of the in-kernel PIT. Userspace might limit itself to
that scenario (PIT implies IOAPIC in-kernel), but that isn't enforced at PIT
creation.

It's worth adding that check in.

 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  arch/x86/kvm/i8254.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
 index f90952f64e79..f588eb7bdf45 100644
 --- a/arch/x86/kvm/i8254.c
 +++ b/arch/x86/kvm/i8254.c
 @@ -333,7 +333,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, 
 int is_period)
   struct kvm_kpit_state *ps = kvm-arch.vpit-pit_state;
   s64 interval;
  
 - if (!irqchip_in_kernel(kvm) || ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
 + if (ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
   return;
  
   interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
 -- 
 1.8.3.1
 
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-28 Thread Steve Rutherford
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
Suggested-by: Andrew Honig aho...@google.com
---
 Documentation/virtual/kvm/api.txt | 15 +++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c  |  5 -
 arch/x86/kvm/ioapic.h |  8 
 arch/x86/kvm/irq.h|  6 ++
 arch/x86/kvm/irq_comm.c   |  9 -
 arch/x86/kvm/lapic.c  |  9 ++---
 arch/x86/kvm/vmx.c|  4 ++--
 arch/x86/kvm/x86.c| 22 --
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 11 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4ebcb7..b655024 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3620,6 +3620,21 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 --
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fa32b53..18a110b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -669,6 +669,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f..5708850 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include linux/kvm_host.h
 #include linux/slab.h
 
+#include ioapic.h
 #include irq.h
 #include i8254.h
 #include x86.h
@@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int 
is_period)
struct kvm_kpit_state *ps = kvm-arch.vpit-pit_state;
s64 interval;
 
-   if (!irqchip_in_kernel(kvm) || ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
+   if (!irqchip_in_kernel(kvm) ||
+   !ioapic_in_kernel(kvm) ||
+   ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
return;
 
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..3ce56f8 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -98,6 +98,14 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm-arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+   int ret;
+
+   ret = (ioapic_irqchip(kvm) != NULL);
+   return ret;
+}
+
 static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
 {
struct kvm_ioapic *ioapic = kvm-arch.vioapic;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..2f13dd5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm-arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+   return kvm-arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
int ret;
 
ret = (pic_irqchip(kvm) != NULL);
+   ret |= irqchip_split(kvm);
smp_rmb();
return ret;
 }
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..67f6b62 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, kvm-arch.irq_sources_bitmap);
-   if (!irqchip_in_kernel(kvm))
+   if (!ioapic_in_kernel(kvm))
goto unlock;
 
kvm_ioapic_clear_all(kvm-arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
return kvm_set_irq_routing(kvm, default_routing,
   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+   return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..536b79e 100644
--- a/arch/x86

[PATCH v6 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-28 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.h   |  2 ++
 arch/x86/kvm/irq_comm.c | 42 +
 arch/x86/kvm/lapic.c|  3 +--
 arch/x86/kvm/x86.c  | 29 ++--
 include/linux/kvm_host.h| 20 
 virt/kvm/irqchip.c  | 12 ++--
 7 files changed, 87 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1e0103..ebe7f07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -674,6 +674,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 3ce56f8..c93fe23 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS 48
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -131,4 +132,5 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
u32 *tmr);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 37e220d..4dbf6c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   if (!irqchip_split(kvm))
-   kvm_vcpu_request_scan_ioapic(kvm);
+   kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a612266..f32f7cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3575,12 +3575,17 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
r = -EINVAL;
-   if (atomic_read(kvm-online_vcpus

[PATCH v6 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-07-28 Thread Steve Rutherford
Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 12 
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/kvm/lapic.c  |  9 +
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  2 +-
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index b655024..78d0ae8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3302,6 +3302,18 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
 
+   /* KVM_EXIT_IOAPIC_EOI */
+   struct {
+   __u8 vector;
+   } eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 18a110b..f1e0103 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -560,6 +560,9 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   u64 eoi_exit_bitmaps[4];
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 536b79e..37e220d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -871,6 +871,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct 
kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
+   if (irqchip_split(apic-vcpu-kvm)) {
+   if (test_bit(vector,
+(void *) apic-vcpu-arch.eoi_exit_bitmaps)) {
+   apic-vcpu-arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic-vcpu);
+   }
+   return;
+   }
+
if (kvm_ioapic_handles_vector(apic-vcpu-kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic-regs + APIC_TMR))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 985c99f..a612266 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6277,6 +6277,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu-arch.pending_ioapic_eoi  255);
+   if (test_bit(vcpu-arch.pending_ioapic_eoi,
+(void *) vcpu-arch.eoi_exit_bitmaps)) {
+   vcpu-run-exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu-run-eoi.vector =
+   vcpu-arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f7eab09..8e12d67 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_APIC_PAGE_RELOAD  25
 #define KVM_REQ_SMI   26
 #define KVM_REQ_HV_CRASH  27
+#define KVM_REQ_IOAPIC_EOI_EXIT   28
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID   1
@@ -1157,4 +1158,3 @@ static inline void kvm_vcpu_set_dy_eligible(struct 
kvm_vcpu *vcpu, bool val)
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #endif
-
diff --git a/include/uapi/linux

[PATCH v6 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-07-28 Thread Steve Rutherford
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupts even when the APICs are in the
kernel.

KVM_INTERRUPT now supports sending local interrupts to an APIC when
APICs are in the kernel.

The ready_for_interrupt_request flag is now only set when the CPU/APIC
will immediately accept and inject an interrupt (i.e. APIC has not
masked the PIC).

When the PIC wishes to initiate an INTA cycle with, say, CPU0, it
kicks CPU0 out of the guest, and renedezvous with CPU0 once it arrives
in userspace.

When the CPU/APIC unmasks the PIC, a KVM_EXIT_IRQ_WINDOW_OPEN is
triggered, so that userspace has a chance to inject a PIC interrupt
if it had been pending.

Overall, this design can lead to a small number of spurious userspace
renedezvous. In particular, whenever the PIC transistions from low to
high while it is masked and whenever the PIC becomes unmasked while
it is low.

Note: this does not buffer more than one local interrupt in the
kernel, so the VMM needs to enter the guest in order to complete
interrupt injection before injecting an additional interrupt.

Compiles for x86.

Can pass the KVM Unit Tests.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 14 ++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c| 38 +-
 arch/x86/kvm/irq.h|  8 
 arch/x86/kvm/x86.c| 35 +++
 5 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 78d0ae8..4de4286 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@ Capability: basic
 Architectures: x86, ppc, mips
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, -1 on error
+Returns: 0 on success, negative on failure.
 
-Queues a hardware interrupt vector to be injected.  This is only
-useful if in-kernel local APIC or equivalent is not used.
+Queues a hardware interrupt vector to be injected.
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
@@ -414,7 +413,14 @@ struct kvm_interrupt {
 
 X86:
 
-Note 'irq' is an interrupt vector, not an interrupt pin or line.
+Returns: 0 on success,
+-EEXIST if an interrupt is already enqueued
+-EINVAL the the irq number is invalid
+-ENXIO if the PIC is in the kernel
+-EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
 
 PPC:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebe7f07..b6508a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -563,6 +563,7 @@ struct kvm_vcpu_arch {
 
u64 eoi_exit_bitmaps[4];
int pending_ioapic_eoi;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..5fa0e6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v-arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
-   return pic_irqchip(v-kvm)-output; /* PIC */
-   else
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept) {
+   if (irqchip_split(v-kvm))
+   return pending_userspace_extint(v);
+   else
+   return pic_irqchip(v-kvm)-output;
+   } else
return 0;
 }
 
@@ -57,7 +70,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
-   return kvm_pic_read_irq(v-kvm); /* PIC */
-   return -1;
+   if (kvm_cpu_has_extint(v)) {
+   if (irqchip_split(v-kvm)) {
+   int vector = v-arch.pending_external_vector

Re: [PATCH v5 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-07-28 Thread Steve Rutherford
On Wed, Jul 29, 2015 at 12:05:37AM +0200, Paolo Bonzini wrote:
 Ok, I understand it now.  However, you're still not causing an exit 
 when LVT0 changes, are you?  post_kvm_run_save is not run until the
 next exit to userspace, which could be a long time later.

Yes! This is definitely right. This may wait as long as an entire entry/exit
before actually exiting to userspace. Moving into dm_request_for_irq_injection
is definitely the way to go.
 
 So, I think that you do not need KVM_REQ_PIC_UNMASK_EXIT.  Instead,
 you can modify dm_request_for_irq_injection to handle the split-irqchip
 case, like this:
 
   if (!vcpu-run-request_interrupt_window || pic_in_kernel(vcpu-kvm))
   return false;
 
   if (kvm_cpu_has_interrupt(vcpu))
   return false;
 
 return (irqchip_split(vcpu-kvm)
 ? kvm_apic_accept_pic_intr(vcpu)
   : kvm_arch_interrupt_allowed(vcpu));
 
 This will cause KVM_RUN to return -EINTR, which QEMU happens to handle
 the same way as KVM_EXIT_IRQ_WINDOW_OPEN.
I definitely prefer the explit exit reason.
It's also a bit easier to make work with our VMM ;)

 Feel free to post v6 of this patch only.  Everything else is mostly
 okay; there are some leftovers here and there (lapic_in_kernel,
 GET_VECTOR_FROM_USERSPACE) but I can fix that.
I'll give it another once over to remove the dead code. Sorry about
leaving that junk in.

 How is the integration with QEMU going?  With this latest iteration
 it should be relatively easy.
A new team member is sinking his teeth into it, as an starter project. 
He'll likely have a prototype of it working soon. 

Steve
 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-07-28 Thread Steve Rutherford
On Tue, Jul 28, 2015 at 05:58:38PM +0200, Paolo Bonzini wrote:
 
 
 On 28/07/2015 01:17, Steve Rutherford wrote:
  return kvm-arch.vpic;
   }
   
  +static inline int pic_in_kernel(struct kvm *kvm)
  +{
  +   int ret;
  +
  +   ret = (pic_irqchip(kvm) != NULL);
  +   smp_rmb();
 
 What does this memory barrier pair with?  I don't think it's necessary.

To be honest, it's probably not necessary. I couldn't find why
irqchip_in_kernel (which this function is more or less a copy of)
needed it's memory barrier, so I cargo culted this one in.

 
  +   return ret;
  +}
  +
   static inline int irqchip_split(struct kvm *kvm)
   {
  return kvm-arch.irqchip_split;
 
 
  @@ -5819,13 +5828,24 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
  kvm_run-flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
  kvm_run-cr8 = kvm_get_cr8(vcpu);
  kvm_run-apic_base = kvm_get_apic_base(vcpu);
  -   if (irqchip_in_kernel(vcpu-kvm))
  +   if (irqchip_in_kernel(vcpu-kvm)  pic_in_kernel(vcpu-kvm))
  kvm_run-ready_for_interrupt_injection = 1;
  -   else
  +   else if (irqchip_in_kernel(vcpu-kvm)) {
  +   int ready_for_interrupt_injection =
  +   kvm_apic_accept_pic_intr(vcpu);
  +
  +   if (!kvm_run-ready_for_interrupt_injection 
  +   ready_for_interrupt_injection)
  +   kvm_make_request(KVM_REQ_PIC_UNMASK_EXIT, vcpu);
  +
  +   kvm_run-ready_for_interrupt_injection =
  +   ready_for_interrupt_injection;
  +   } else {
  kvm_run-ready_for_interrupt_injection =
  kvm_arch_interrupt_allowed(vcpu) 
  !kvm_cpu_has_interrupt(vcpu) 
  !kvm_event_needs_reinjection(vcpu);
  +   }
   }
   
   static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
 Why is this necessary?  Could it just set
 kvm_run-ready_for_interrupt_injection as in the pic_in_kernel case?

The goal is to couple the interrupt ack cycle as closely as possible
with the injection of the local interrupt (which occur more or less
atomically on real hardware). The idea is to only ever attempt to
inject local interrupts when the CPU/APIC is ready to immediately
accept. 

If the CPU is ignoring the PIC, the interrupt acknowledge cycle should
not be performed, even if the PIC is high. This patch uses the
ready_for_interrupt_injection flag to let userspace whether or not the
cpu is paying attention to the PIC at the moment. 

When the PIC is high and the CPU transitions from ignoring the PIC to
paying attention to the PIC, it should (per real hardware)
immediately trigger an interrupt acknowledge cycle (which requires
bouncing up to userspace).

Steve
 
 Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-07-27 Thread Steve Rutherford
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
Suggested-by: Andrew Honig aho...@google.com
---
 Documentation/virtual/kvm/api.txt | 15 +++
 arch/powerpc/kvm/irq.h|  1 -
 arch/s390/kvm/irq.h   |  1 -
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c  |  5 -
 arch/x86/kvm/ioapic.h |  9 +
 arch/x86/kvm/irq.h|  6 ++
 arch/x86/kvm/irq_comm.c   |  9 -
 arch/x86/kvm/lapic.c  |  9 ++---
 arch/x86/kvm/vmx.c|  4 ++--
 arch/x86/kvm/x86.c| 23 +--
 include/kvm/arm_vgic.h|  1 +
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 14 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4ebcb7..b655024 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3620,6 +3620,21 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 --
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b..772fa8c 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -16,5 +16,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
smp_rmb();
return ret;
 }
-
 #endif
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e415..9a21a86 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -18,5 +18,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 {
return 1;
 }
-
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fa32b53..18a110b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -669,6 +669,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f..5708850 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include linux/kvm_host.h
 #include linux/slab.h
 
+#include ioapic.h
 #include irq.h
 #include i8254.h
 #include x86.h
@@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int 
is_period)
struct kvm_kpit_state *ps = kvm-arch.vpit-pit_state;
s64 interval;
 
-   if (!irqchip_in_kernel(kvm) || ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
+   if (!irqchip_in_kernel(kvm) ||
+   !ioapic_in_kernel(kvm) ||
+   ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
return;
 
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..d8cc54b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -98,6 +98,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm-arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+   int ret;
+
+   ret = (ioapic_irqchip(kvm) != NULL);
+   smp_rmb();
+   return ret;
+}
+
 static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
 {
struct kvm_ioapic *ioapic = kvm-arch.vioapic;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..2f13dd5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm-arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+   return kvm-arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
int ret;
 
ret = (pic_irqchip(kvm) != NULL);
+   ret |= irqchip_split(kvm);
smp_rmb();
return ret;
 }
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..67f6b62 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, kvm

[PATCH v5 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-07-27 Thread Steve Rutherford
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupts even when the APICs are in the
kernel.

KVM_INTERRUPT now supports sending local interrupts to an APIC when
APICs are in the kernel.

The ready_for_interrupt_request flag is now only set when the CPU/APIC
will immediately accept and inject an interrupt (i.e. APIC has not
masked the PIC).

When the PIC wishes to initiate an INTA cycle with, say, CPU0, it
kicks CPU0 out of the guest, and renedezvous with CPU0 once it arrives
in userspace.

When the CPU/APIC unmasks the PIC, a KVM_EXIT_IRQ_WINDOW_OPEN is
triggered, so that userspace has a chance to inject a PIC interrupt
if it had been pending.

Overall, this design can lead to a small number of spurious userspace
renedezvous. In particular, whenever the PIC transistions from low to
high while it is masked and whenever the PIC becomes unmasked while
it is low.

Note: this does not buffer more than one local interrupt in the
kernel, so the VMM needs to enter the guest in order to complete
interrupt injection before injecting an additional interrupt.

Compiles for x86.

Can pass the KVM Unit Tests.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 14 ++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c| 38 +-
 arch/x86/kvm/irq.h|  9 +
 arch/x86/kvm/lapic.h  |  2 ++
 arch/x86/kvm/x86.c| 34 +++---
 include/linux/kvm_host.h  |  1 +
 7 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 39e4c02..8f754d1 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@ Capability: basic
 Architectures: x86, ppc, mips
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, -1 on error
+Returns: 0 on success, negative on failure.
 
-Queues a hardware interrupt vector to be injected.  This is only
-useful if in-kernel local APIC or equivalent is not used.
+Queues a hardware interrupt vector to be injected.
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
@@ -414,7 +413,14 @@ struct kvm_interrupt {
 
 X86:
 
-Note 'irq' is an interrupt vector, not an interrupt pin or line.
+Returns: 0 on success,
+-EEXIST if an interrupt is already enqueued
+-EINVAL the the irq number is invalid
+-ENXIO if the PIC is in the kernel
+-EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
 
 PPC:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebe7f07..b6508a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -563,6 +563,7 @@ struct kvm_vcpu_arch {
 
u64 eoi_exit_bitmaps[4];
int pending_ioapic_eoi;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..5fa0e6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v-arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
-   return pic_irqchip(v-kvm)-output; /* PIC */
-   else
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept) {
+   if (irqchip_split(v-kvm))
+   return pending_userspace_extint(v);
+   else
+   return pic_irqchip(v-kvm)-output;
+   } else
return 0;
 }
 
@@ -57,7 +70,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!pic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
-   return kvm_pic_read_irq(v-kvm); /* PIC */
-   return -1;
+   if (kvm_cpu_has_extint(v

[PATCH v5 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-07-27 Thread Steve Rutherford
Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 13 +
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/kvm/lapic.c  |  9 +
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  2 +-
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index b655024..6a13dff 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3302,6 +3302,18 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
 
+   /* KVM_EXIT_IOAPIC_EOI */
+struct {
+  __u8 vector;
+} eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
/* Fix the size of the union. */
char padding[256];
};
@@ -3315,6 +3327,7 @@ Valid values for 'type' are:
 */
__u64 kvm_valid_regs;
__u64 kvm_dirty_regs;
+
union {
struct kvm_sync_regs regs;
char padding[1024];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 18a110b..f1e0103 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -560,6 +560,9 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   u64 eoi_exit_bitmaps[4];
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 536b79e..37e220d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -871,6 +871,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct 
kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
+   if (irqchip_split(apic-vcpu-kvm)) {
+   if (test_bit(vector,
+(void *) apic-vcpu-arch.eoi_exit_bitmaps)) {
+   apic-vcpu-arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic-vcpu);
+   }
+   return;
+   }
+
if (kvm_ioapic_handles_vector(apic-vcpu-kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic-regs + APIC_TMR))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d4b4dc..03ba33a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6278,6 +6278,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu-arch.pending_ioapic_eoi  255);
+   if (test_bit(vcpu-arch.pending_ioapic_eoi,
+(void *) vcpu-arch.eoi_exit_bitmaps)) {
+   vcpu-run-exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu-run-eoi.vector =
+   vcpu-arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f7eab09..8e12d67 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_APIC_PAGE_RELOAD  25
 #define KVM_REQ_SMI   26
 #define KVM_REQ_HV_CRASH  27
+#define KVM_REQ_IOAPIC_EOI_EXIT   28
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID   1

[PATCH v5 3/4] KVM: x86: Add EOI exit bitmap inference

2015-07-27 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt |  8 
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/ioapic.h |  2 ++
 arch/x86/kvm/irq_comm.c   | 42 +++
 arch/x86/kvm/lapic.c  |  3 +--
 arch/x86/kvm/x86.c| 29 +--
 include/linux/kvm_host.h  | 20 +++
 virt/kvm/irqchip.c| 12 ++-
 8 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 6a13dff..39e4c02 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3302,10 +3302,10 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
 
-   /* KVM_EXIT_IOAPIC_EOI */
-struct {
-  __u8 vector;
-} eoi;
+   /* KVM_EXIT_IOAPIC_EOI */
+   struct {
+   __u8 vector;
+   } eoi;
 
 Indicates that the VCPU's in-kernel local APIC received an EOI for a
 level-triggered IOAPIC interrupt.  This exit only triggers when the
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1e0103..ebe7f07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -674,6 +674,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index d8cc54b..f6ce112 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS 48
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -132,4 +133,5 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
u32 *tmr);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm

Re: [PATCH v3 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-06-25 Thread Steve Rutherford
 However, why is the roundtrip to userspace necessary?  Could you pass
 the extint index directly as an argument to KVM_INTERRUPT?  It's
 backwards-compatible, because KVM_INTERRUPT so far could not be used
 together with an in-kernel LAPIC.  If you could do that, you could also
 avoid the new userspace_extint_available field.

Implemented a basic version of this, and ran into some potential
issues with this strategy. Supporting PIC masking/unmasking by the
CPU/APIC means that either:
A) PIC interrupts need to be bufferable in the kernel (with some way
   of comparing priorities).
B) the APIC state needs to be read in order to fetch the bit as to
   whether or not the PIC is being masked (which I believe can be done
   from userspace via the APIC state ioctl).
C) something hacky that doesn't conform to the PIC spec but still
   happens to boot common OSes (like buffering the interrupts and
   injecting them in the order of arrival (which is wrong)).

Steve
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-06-19 Thread Steve Rutherford
On Wed, Jun 03, 2015 at 11:38:21AM +0200, Paolo Bonzini wrote:
 
 
 On 03/06/2015 01:51, Steve Rutherford wrote:
  In order to enable userspace PIC support, the userspace PIC needs to
  be able to inject local interrupt requests.
  
  This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
  KVM_EXIT_GET_EXTINT.
  
  The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
  on the BSP, which causes the BSP to exit to userspace to fetch the
  vector of the underlying external interrupt, which the BSP then
  injects into the guest. This matches the PIC spec, and is necessary to
  boot Windows.
  
  Compiles for x86.
  
  Update: Boots Windows and passes the KVM Unit Tests.
  
  Signed-off-by: Steve Rutherford srutherf...@google.com
  ---
   Documentation/virtual/kvm/api.txt |  9 ++
   arch/x86/include/asm/kvm_host.h   |  2 ++
   arch/x86/kvm/irq.c| 22 +--
   arch/x86/kvm/lapic.c  |  7 +
   arch/x86/kvm/lapic.h  |  2 ++
   arch/x86/kvm/x86.c| 59 
  +--
   include/uapi/linux/kvm.h  |  7 +
   7 files changed, 103 insertions(+), 5 deletions(-)
  
  diff --git a/Documentation/virtual/kvm/api.txt 
  b/Documentation/virtual/kvm/api.txt
  index 6ab2a3f7..b5d90cb 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -2979,6 +2979,15 @@ len must be a multiple of sizeof(struct 
  kvm_s390_irq). It must be  0
   and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
   which is the maximum number of possibly pending cpu-local interrupts.
   
  +4.96 KVM_REQUEST_PIC_INJECTION
  +
  +Capability: KVM_CAP_SPLIT_IRQCHIP
  +Type: VM ioctl
  +Parameters: none
  +Returns: 0 on success, -1 on error.
  +
  +Informs the kernel that userspace has a pending external interrupt.
  +
 
 Missing documentation for the new vmexit and kvm_run member.
 
 However, why is the roundtrip to userspace necessary?  Could you pass
 the extint index directly as an argument to KVM_INTERRUPT?  It's
 backwards-compatible, because KVM_INTERRUPT so far could not be used
 together with an in-kernel LAPIC.  If you could do that, you could also
 avoid the new userspace_extint_available field.
 
 Userspace can figure out who's the BSP.  The rendez-vous between the
 irqchip and the BSP's VCPU thread is still needed, but it can be done
 entirely in userspace.
 
 You'd also need much fewer changes to irq.c.  Basically just something like
 
  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
  {
  int vector;
 
 -if (!irqchip_in_kernel(v-kvm))
 +if (!pic_in_kernel(v-kvm)  v-arch.interrupt.pending)
 return v-arch.interrupt.nr;
 
 ...
 
  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  {
 -if (!irqchip_in_kernel(v-kvm))
 +if (!pic_in_kernel(v-kvm))
  return v-arch.interrupt.pending;
 
 ...
 
  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
  {
 -if (!irqchip_in_kernel(v-kvm))
 +if (!pic_in_kernel(v-kvm))
  return v-arch.interrupt.pending;
 
 More comments below.
 
   5. The kvm_run structure
   
  diff --git a/arch/x86/include/asm/kvm_host.h 
  b/arch/x86/include/asm/kvm_host.h
  index 4f439ff..0e8b0fc 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
   
  u64 eoi_exit_bitmaps[4];
  int pending_ioapic_eoi;
  +   bool userspace_extint_available;
  +   int pending_external_vector;
   };
   
 
   struct kvm_lpage_info {
  diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
  index 706e47a..1270b2a 100644
  --- a/arch/x86/kvm/irq.c
  +++ b/arch/x86/kvm/irq.c
  @@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
   EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
   
   /*
  + * check if there is a pending userspace external interrupt
  + */
  +static int pending_userspace_extint(struct kvm_vcpu *v)
  +{
  +   return v-arch.userspace_extint_available ||
  +  v-arch.pending_external_vector != -1;
  +}
  +
  +/*
* check if there is pending interrupt from
* non-APIC source without intack.
*/
   static int kvm_cpu_has_extint(struct kvm_vcpu *v)
   {
  -   if (kvm_apic_accept_pic_intr(v))
  +   u8 accept = kvm_apic_accept_pic_intr(v);
  +
  +   if (accept  irqchip_split(v-kvm))
  +   return pending_userspace_extint(v);
  +   else if (accept)
  return pic_irqchip(v-kvm)-output; /* PIC */
 
   if (accept) {
   if (irqchip_split(v-kvm))
   return pending_userspace_extint(v);
   else
   return pic_irqchip(v-kvm)-output;
   }
 
   return 0;
 
  else
  return 0;
  @@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
*/
   static int kvm_cpu_get_extint(struct kvm_vcpu *v)
   {
  -   if (kvm_cpu_has_extint(v

[PATCH v4 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-06-15 Thread Steve Rutherford
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupt requests.

This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
KVM_EXIT_GET_EXTINT.

The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
on the BSP, which causes the BSP to exit to userspace to fetch the
vector of the underlying external interrupt, which the BSP then
injects into the guest. This matches the PIC spec, and is necessary to
boot Windows.

Compiles for x86.

Update: Boots Windows and passes the KVM Unit Tests.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 21 +++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/irq.c| 22 +--
 arch/x86/kvm/lapic.c  |  7 +
 arch/x86/kvm/lapic.h  |  2 ++
 arch/x86/kvm/x86.c| 57 ---
 include/uapi/linux/kvm.h  | 11 
 7 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index e5e3e94..b2e48a0 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,19 @@ len must be a multiple of sizeof(struct kvm_s390_irq). 
It must be  0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+4.96 KVM_REQUEST_PIC_INJECTION
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Type: VM ioctl
+Parameters: struct kvm_pic_injection (in)
+Returns: 0 on success, -1 on error.
+
+Informs the kernel that userspace has a pending external interrupt for
+the specified cpu.
+
+struct kvm_pic_injection {
+   __u32 cpu;
+};
 
 5. The kvm_run structure
 
@@ -3270,6 +3283,14 @@ the userspace IOAPIC should process the EOI and 
retrigger the interrupt if
 it is still asserted.  Vector is the LAPIC interrupt vector for which the
 EOI was received.
 
+   /* KVM_EXIT_GET_EXTINT */
+   struct {
+   __u8 vector;
+   } extint;
+Used when a VCPU needs to exit to userspace to fetch an external interrupt
+vector from a userspace PIC (which is necessary when KVM_CAP_SPLIT_IRQCHIP
+is enabled). The vector should be stored in the exit struct upon reentry.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f439ff..0e8b0fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
 
u64 eoi_exit_bitmaps[4];
int pending_ioapic_eoi;
+   bool userspace_extint_available;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..e9ecade 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v-arch.userspace_extint_available ||
+  v-arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept  irqchip_split(v-kvm))
+   return pending_userspace_extint(v);
+   else if (accept)
return pic_irqchip(v-kvm)-output; /* PIC */
else
return 0;
@@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
+   if (irqchip_split(v-kvm)  kvm_cpu_has_extint(v)) {
+   int vector = v-arch.pending_external_vector;
+
+   v-arch.pending_external_vector = -1;
+   return vector;
+   } else if (kvm_cpu_has_extint(v))
return kvm_pic_read_irq(v-kvm); /* PIC */
return -1;
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 45be02b..932ab94 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2094,3 +2094,10 @@ void kvm_lapic_init(void)
jump_label_rate_limit(apic_hw_disabled, HZ);
jump_label_rate_limit(apic_sw_disabled, HZ);
 }
+
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
+{
+   vcpu-arch.userspace_extint_available = true;
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   kvm_vcpu_kick(vcpu);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71b150c..7831e4d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -63,6 +63,8 @@ int kvm_apic_set_irq

[PATCH v4 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-06-15 Thread Steve Rutherford
Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 13 +
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/kvm/lapic.c  |  9 +
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 42 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 3143f9e..ec0cb19 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3258,6 +3258,18 @@ Valid values for 'type' are:
As with SHUTDOWN, userspace can choose to ignore the request, or
to schedule the reset to occur in the future and may call KVM_RUN again.
 
+   /* KVM_EXIT_IOAPIC_EOI */
+struct {
+  __u8 vector;
+} eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
/* Fix the size of the union. */
char padding[256];
};
@@ -3271,6 +3283,7 @@ Valid values for 'type' are:
 */
__u64 kvm_valid_regs;
__u64 kvm_dirty_regs;
+
union {
struct kvm_sync_regs regs;
char padding[1024];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af3225a..2778d36 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -540,6 +540,9 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   u64 eoi_exit_bitmaps[4];
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 717b4e2..e2ae4b4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -869,6 +869,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct 
kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
+   if (irqchip_split(apic-vcpu-kvm)) {
+   if (test_bit(vector,
+(void *) apic-vcpu-arch.eoi_exit_bitmaps)) {
+   apic-vcpu-arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic-vcpu);
+   }
+   return;
+   }
+
if (kvm_ioapic_handles_vector(apic-vcpu-kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic-regs + APIC_TMR))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1cc6a65..3b05c01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6392,6 +6392,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu-arch.pending_ioapic_eoi  255);
+   if (test_bit(vcpu-arch.pending_ioapic_eoi,
+(void *) vcpu-arch.eoi_exit_bitmaps)) {
+   vcpu-run-exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu-run-eoi.vector =
+   vcpu-arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e2b41a..c6df36f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_ENABLE_IBS23
 #define KVM_REQ_DISABLE_IBS   24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_IOAPIC_EOI_EXIT   26
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define

[PATCH v4 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-06-15 Thread Steve Rutherford
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
Suggested-by: Andrew Honig aho...@google.com
---
 Documentation/virtual/kvm/api.txt | 16 
 arch/powerpc/kvm/irq.h|  1 -
 arch/s390/kvm/irq.h   |  1 -
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c  |  5 -
 arch/x86/kvm/ioapic.h |  9 +
 arch/x86/kvm/irq.h|  6 ++
 arch/x86/kvm/irq_comm.c   |  9 -
 arch/x86/kvm/lapic.c  |  9 ++---
 arch/x86/kvm/vmx.c|  4 ++--
 arch/x86/kvm/x86.c| 23 +--
 include/kvm/arm_vgic.h|  1 +
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 14 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 6955444..3143f9e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It 
must be  0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+
 5. The kvm_run structure
 
 
@@ -3575,6 +3576,21 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 --
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b..772fa8c 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -16,5 +16,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
smp_rmb();
return ret;
 }
-
 #endif
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e415..9a21a86 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -18,5 +18,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 {
return 1;
 }
-
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7276107..af3225a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -639,6 +639,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4dce6f8..de06e7e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include linux/kvm_host.h
 #include linux/slab.h
 
+#include ioapic.h
 #include irq.h
 #include i8254.h
 #include x86.h
@@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int 
is_period)
struct kvm_kpit_state *ps = kvm-arch.vpit-pit_state;
s64 interval;
 
-   if (!irqchip_in_kernel(kvm) || ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
+   if (!irqchip_in_kernel(kvm) ||
+   !ioapic_in_kernel(kvm) ||
+   ps-flags  KVM_PIT_FLAGS_HPET_LEGACY)
return;
 
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..d8cc54b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -98,6 +98,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm-arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+   int ret;
+
+   ret = (ioapic_irqchip(kvm) != NULL);
+   smp_rmb();
+   return ret;
+}
+
 static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
 {
struct kvm_ioapic *ioapic = kvm-arch.vioapic;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..2f13dd5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm-arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+   return kvm-arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
int ret;
 
ret = (pic_irqchip(kvm) != NULL);
+   ret |= irqchip_split(kvm);
smp_rmb();
return ret;
 }
diff --git a/arch/x86/kvm/irq_comm.c b

[PATCH v4 3/4] KVM: x86: Add EOI exit bitmap inference

2015-06-15 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt |  8 
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/ioapic.h |  2 ++
 arch/x86/kvm/irq_comm.c   | 42 +++
 arch/x86/kvm/lapic.c  |  3 +--
 arch/x86/kvm/x86.c| 29 +--
 include/linux/kvm_host.h  | 21 +++-
 virt/kvm/irqchip.c| 13 ++--
 8 files changed, 91 insertions(+), 28 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index ec0cb19..e5e3e94 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3258,10 +3258,10 @@ Valid values for 'type' are:
As with SHUTDOWN, userspace can choose to ignore the request, or
to schedule the reset to occur in the future and may call KVM_RUN again.
 
-   /* KVM_EXIT_IOAPIC_EOI */
-struct {
-  __u8 vector;
-} eoi;
+   /* KVM_EXIT_IOAPIC_EOI */
+   struct {
+   __u8 vector;
+   } eoi;
 
 Indicates that the VCPU's in-kernel local APIC received an EOI for a
 level-triggered IOAPIC interrupt.  This exit only triggers when the
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2778d36..4f439ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -644,6 +644,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index d8cc54b..f6ce112 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS 48
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -132,4 +133,5 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
u32 *tmr);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62..da4827f 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm-irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(kvm-irq_srcu);
+   table = kvm-irq_routing;
+   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
+  kvm-arch.nr_reserved_ioapic_pins);
+   for (i = 0; i  nr_ioapic_pins; ++i) {
+   hlist_for_each_entry(entry, table-map[i], link) {
+   u32 dest_id, dest_mode;
+
+   if (entry-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   dest_id = (entry-msi.address_lo  12)  0xff;
+   dest_mode = (entry-msi.address_lo  2)  0x1;
+   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+   dest_mode)) {
+   u32 vector = entry-msi.data  0xff;
+
+   __set_bit(vector,
+ (unsigned long *) eoi_exit_bitmap);
+   }
+   }
+   }
+   srcu_read_unlock(kvm-irq_srcu, idx

Re: [PATCH v3 3/4] KVM: x86: Add EOI exit bitmap inference

2015-06-04 Thread Steve Rutherford
On Wed, Jun 03, 2015 at 11:16:16AM +0200, Paolo Bonzini wrote:
 
 
 On 03/06/2015 01:51, Steve Rutherford wrote:
  +static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
  +{
  +}
 
 Please add the static inline to all arches instead of putting it in
 #ifndef __KVM_HAVE_IOAPIC.  It's not related to the existence of an ioapic.
 
  
  +void kvm_arch_irq_routing_update(struct kvm *kvm)
  +{
  +   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
  +
  +   if (ioapic)
  +   return;
  +   if (!lapic_in_kernel(kvm))
  +   return;
  +   kvm_make_scan_ioapic_request(kvm);
  +}
  +
 
 It's weird to have a function in ioapic.c that only does something if
 you _do not_ have an ioapic. :)
 
  +
  +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 
 This must stay in arch/x86/kvm/.  I'd put both of these in irq_comm.c.
 
 Then you do not need kvm_arch_nr_userspace_ioapic_pins anymore.
Yes! This is way cleaner. I'll make these changes.

Steve
 
 Paolo
 
  +{
  +   struct kvm *kvm = vcpu-kvm;
  +   struct kvm_kernel_irq_routing_entry *entry;
  +   struct kvm_irq_routing_table *table;
  +   u32 i, nr_ioapic_pins;
  +   int idx;
  +
  +   /* kvm-irq_routing must be read after clearing
  +* KVM_SCAN_IOAPIC. */
  +   smp_mb();
  +   idx = srcu_read_lock(kvm-irq_srcu);
  +   table = kvm-irq_routing;
  +   nr_ioapic_pins = min_t(u32, table-nr_rt_entries,
  + kvm_arch_nr_userspace_ioapic_pins(kvm));
  +   for (i = 0; i  nr_ioapic_pins; ++i) {
  +   hlist_for_each_entry(entry, table-map[i], link) {
  +   u32 dest_id, dest_mode;
  +
  +   if (entry-type != KVM_IRQ_ROUTING_MSI)
  +   continue;
  +   dest_id = (entry-msi.address_lo  12)  0xff;
  +   dest_mode = (entry-msi.address_lo  2)  0x1;
  +   if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
  +   dest_mode)) {
  +   u32 vector = entry-msi.data  0xff;
  +
  +   __set_bit(vector,
  + (unsigned long *) eoi_exit_bitmap);
  +   }
  +   }
  +   }
  +   srcu_read_unlock(kvm-irq_srcu, idx);
  +}
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-06-04 Thread Steve Rutherford
On Wed, Jun 03, 2015 at 10:54:41AM +0200, Paolo Bonzini wrote:
 
 
 On 03/06/2015 01:51, Steve Rutherford wrote:
  First patch in a series which enables the relocation of the
  PIC/IOAPIC to userspace.
  
  Adds capability KVM_CAP_SPLIT_IRQCHIP;
  
  KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
  rest of the irqchip.
 
 The documentation is not updated.
Ack.
 
 Changing other arches is definitely a no-no, unfortunately.  But there
 are so many s/irqchip_in_kernel/lapic_in_kernel/ changes here, that I
 wonder if you should just keep irqchip_in_kernel true in the split
 irqchip case.  You are already testing irqchip_split in a few cases,
 and you can add ioapic_in_kernel whenever you need to test
 lapic_in_kernel  !irqchip_split at the same time.

From the perspective of avoiding impacting other architectures, this is a
good idea, but the naming seems strange in the x86 case. Having
irqchip_in_kernel be true when the ioapic/pic are in userspace seems
strange. Admittedly, the irqchip isn't a real concept on x86, so
inventing a new meaning is fine.

Despite my hesitation, I'll change the naming around.

Steve

 
 Paolo
 
  Compile tested for x86.
  
  Signed-off-by: Steve Rutherford srutherf...@google.com
  Suggested-by: Andrew Honig aho...@google.com
  ---
   Documentation/virtual/kvm/api.txt | 15 
   arch/powerpc/kvm/irq.h|  5 
   arch/s390/kvm/irq.h   |  4 
   arch/x86/include/asm/kvm_host.h   |  2 ++
   arch/x86/kvm/assigned-dev.c   |  4 ++--
   arch/x86/kvm/irq.c|  6 ++---
   arch/x86/kvm/irq.h| 11 +
   arch/x86/kvm/irq_comm.c   |  7 ++
   arch/x86/kvm/lapic.c  | 13 +++
   arch/x86/kvm/mmu.c|  2 +-
   arch/x86/kvm/svm.c|  4 ++--
   arch/x86/kvm/vmx.c| 12 +-
   arch/x86/kvm/x86.c| 49 
  +++
   include/kvm/arm_vgic.h|  1 +
   include/linux/kvm_host.h  |  1 +
   include/uapi/linux/kvm.h  |  1 +
   virt/kvm/irqchip.c|  2 +-
   17 files changed, 104 insertions(+), 35 deletions(-)
  
  diff --git a/Documentation/virtual/kvm/api.txt 
  b/Documentation/virtual/kvm/api.txt
  index 6955444..9a43d42 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct 
  kvm_s390_irq). It must be  0
   and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
   which is the maximum number of possibly pending cpu-local interrupts.
   
  +
   5. The kvm_run structure
   
   
  @@ -3575,6 +3576,20 @@ struct {
   
   KVM handlers should exit to userspace with rc = -EREMOTE.
   
  +7.5 KVM_SPLIT_IRQCHIP
  +
  +Capability: KVM_CAP_SPLIT_IRQCHIP
  +Architectures: x86
  +Type:  VM ioctl
  +Parameters: None
  +Returns: 0 on success, -1 on error
  +
  +Create a local apic for each processor in the kernel.  This differs from
  +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates 
  neither
  +the ioapic nor the pic in the kernel. Also, enables in kernel routing of
  +interrupt requests. Fails if VCPU has already been created, or if the 
  irqchip is
  +already in the kernel.
  +
   
   8. Other capabilities.
   --
  diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
  index 5a9a10b..5e6fa06 100644
  --- a/arch/powerpc/kvm/irq.h
  +++ b/arch/powerpc/kvm/irq.h
  @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
  return ret;
   }
   
  +static inline int lapic_in_kernel(struct kvm *kvm)
  +{
  +   return irqchip_in_kernel(kvm);
  +}
  +
   #endif
  diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
  index d98e415..db876c3 100644
  --- a/arch/s390/kvm/irq.h
  +++ b/arch/s390/kvm/irq.h
  @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
  return 1;
   }
   
  +static inline int lapic_in_kernel(struct kvm *kvm)
  +{
  +   return irqchip_in_kernel(kvm);
  +}
   #endif
  diff --git a/arch/x86/include/asm/kvm_host.h 
  b/arch/x86/include/asm/kvm_host.h
  index 7276107..af3225a 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -639,6 +639,8 @@ struct kvm_arch {
  bool boot_vcpu_runs_old_kvmclock;
   
  u64 disabled_quirks;
  +
  +   bool irqchip_split;
   };
   
   struct kvm_vm_stat {
  diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
  index d090ecf..1237e92 100644
  --- a/arch/x86/kvm/assigned-dev.c
  +++ b/arch/x86/kvm/assigned-dev.c
  @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm,
   {
  unsigned long guest_irq_type, host_irq_type;
   
  -   if (!irqchip_in_kernel(kvm))
  +   if (!lapic_in_kernel(kvm))
  return -EINVAL;
  /* no irq assignment to deassign */
  if (!assigned_dev-irq_requested_type

Re: [PATCH v3 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-06-04 Thread Steve Rutherford
On Wed, Jun 03, 2015 at 11:38:21AM +0200, Paolo Bonzini wrote:
 
 However, why is the roundtrip to userspace necessary?  Could you pass
 the extint index directly as an argument to KVM_INTERRUPT?  It's
 backwards-compatible, because KVM_INTERRUPT so far could not be used
 together with an in-kernel LAPIC.  If you could do that, you could also
 avoid the new userspace_extint_available field.
This is possible, and definitely simpler, but not accurate to the spec.
In general, the PIC fires an INT, which leads to the CPU responding with
and INTA, and fetching the interrupt vector. It might not be strictly
necessary for this handshake to occur, but it is how the hardware did it
originally. 

In certain cases, having the interface modelled after the hardware is 
convenient. For example, devices can send external interrupt MSIs,
which require an Interrupt Ack to fetch the vector. They're a bit weird,
and I have absolutely no idea why someone would want these, but they are
a definitely a thing.

Looking back at KVM though, it doesn't look like KVM even supports these,
so this may not be a real issue. Eliding the roundtrip might be acceptable.
It's certainly simpler.

 Userspace can figure out who's the BSP.  The rendez-vous between the
 irqchip and the BSP's VCPU thread is still needed, but it can be done
 entirely in userspace.
Good point. I'll push this up into userspace. 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Update on Split Irqchip Patches

2015-06-02 Thread Steve Rutherford
Hi All,

I just sent out a new version of the patches that enable a split
irqchip. I've tested them against Google's VMM, and the updated
patches boot Windows and pass the KVM unit tests. It's mostly the
same as before, with the tweaks suggested against the first version
I sent out.

A new Google Intern (Andrew Liu) is currently looking into patching
QEMU to work with this patch set, which is pretty exciting.

Are there any changes/updates to this patch set that are necessary
before it could be merged?

Cheers,
Steve
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-06-02 Thread Steve Rutherford
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
Suggested-by: Andrew Honig aho...@google.com
---
 Documentation/virtual/kvm/api.txt | 15 
 arch/powerpc/kvm/irq.h|  5 
 arch/s390/kvm/irq.h   |  4 
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/assigned-dev.c   |  4 ++--
 arch/x86/kvm/irq.c|  6 ++---
 arch/x86/kvm/irq.h| 11 +
 arch/x86/kvm/irq_comm.c   |  7 ++
 arch/x86/kvm/lapic.c  | 13 +++
 arch/x86/kvm/mmu.c|  2 +-
 arch/x86/kvm/svm.c|  4 ++--
 arch/x86/kvm/vmx.c| 12 +-
 arch/x86/kvm/x86.c| 49 +++
 include/kvm/arm_vgic.h|  1 +
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 virt/kvm/irqchip.c|  2 +-
 17 files changed, 104 insertions(+), 35 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 6955444..9a43d42 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It 
must be  0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+
 5. The kvm_run structure
 
 
@@ -3575,6 +3576,20 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Architectures: x86
+Type:  VM ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel.  This differs from
+KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither
+the ioapic nor the pic in the kernel. Also, enables in kernel routing of
+interrupt requests. Fails if VCPU has already been created, or if the irqchip 
is
+already in the kernel.
+
 
 8. Other capabilities.
 --
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b..5e6fa06 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return ret;
 }
 
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+   return irqchip_in_kernel(kvm);
+}
+
 #endif
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e415..db876c3 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return 1;
 }
 
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+   return irqchip_in_kernel(kvm);
+}
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7276107..af3225a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -639,6 +639,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index d090ecf..1237e92 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm,
 {
unsigned long guest_irq_type, host_irq_type;
 
-   if (!irqchip_in_kernel(kvm))
+   if (!lapic_in_kernel(kvm))
return -EINVAL;
/* no irq assignment to deassign */
if (!assigned_dev-irq_requested_type)
@@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
unsigned long host_irq_type, guest_irq_type;
 
-   if (!irqchip_in_kernel(kvm))
+   if (!lapic_in_kernel(kvm))
return r;
 
mutex_lock(kvm-lock);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..706e47a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!lapic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v-kvm))
+   if (!lapic_in_kernel(v-kvm))
return v-arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
int vector

[PATCH v3 3/4] KVM: x86: Add EOI exit bitmap inference

2015-06-02 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c   | 16 
 arch/x86/kvm/ioapic.h   |  2 ++
 arch/x86/kvm/lapic.c|  3 +--
 arch/x86/kvm/x86.c  | 30 ++
 include/linux/kvm_host.h|  9 +
 virt/kvm/irqchip.c  | 37 +
 7 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2778d36..4f439ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -644,6 +644,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 856f791..fb5281b 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -672,3 +672,19 @@ int kvm_set_ioapic(struct kvm *kvm, struct 
kvm_ioapic_state *state)
spin_unlock(ioapic-lock);
return 0;
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+
+   if (ioapic)
+   return;
+   if (!lapic_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+u8 kvm_arch_nr_userspace_ioapic_pins(struct kvm *kvm)
+{
+   return kvm-arch.nr_reserved_ioapic_pins;
+}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..3af349c 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS 48
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -123,4 +124,5 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
u32 *tmr);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 28eb946..766d297 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@ out:
if (old)
kfree_rcu(old, rcu);
 
-   if (!irqchip_split(kvm))
-   kvm_vcpu_request_scan_ioapic(kvm);
+   kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5e01810..35d13d4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3930,15 +3930,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
case KVM_CAP_SPLIT_IRQCHIP: {
mutex_lock(kvm-lock);
r = -EEXIST;
-   if (lapic_in_kernel(kvm))
+   if (irqchip_in_kernel(kvm))
goto split_irqchip_unlock;
r = -EINVAL;
-   if (atomic_read(kvm-online_vcpus))
-   goto split_irqchip_unlock;
-   r = kvm_setup_empty_irq_routing(kvm);
-   if (r)
+   if (cap-args[0]  MAX_NR_RESERVED_IOAPIC_PINS)
goto split_irqchip_unlock;
-   kvm-arch.irqchip_split = true;
+   if (!irqchip_split(kvm)) {
+   if (atomic_read(kvm-online_vcpus))
+   goto split_irqchip_unlock;
+   r = kvm_setup_empty_irq_routing(kvm);
+   if (r)
+   goto split_irqchip_unlock;
+   kvm-arch.irqchip_split = true;
+   }
+   kvm-arch.nr_reserved_ioapic_pins = cap-args[0];
r = 0;
 split_irqchip_unlock:
mutex_unlock(kvm-lock);
@@ -6403,8 +6408,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
-   if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
-   vcpu_scan_ioapic

[PATCH v3 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-06-02 Thread Steve Rutherford
Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt | 10 ++
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/kvm/lapic.c  |  9 +
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 39 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 9a43d42..6ab2a3f7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3271,6 +3271,16 @@ Valid values for 'type' are:
 */
__u64 kvm_valid_regs;
__u64 kvm_dirty_regs;
+
+   /* KVM_EXIT_IOAPIC_EOI */
+struct {
+  __u8 vector;
+} eoi;
+
+Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
+occurred, which should be handled by the userspace IOAPIC. Triggers when
+the Irqchip has been split between userspace and the kernel.
+
union {
struct kvm_sync_regs regs;
char padding[1024];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af3225a..2778d36 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -540,6 +540,9 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   u64 eoi_exit_bitmaps[4];
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 92f4c98..28eb946 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -869,6 +869,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct 
kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
+   if (irqchip_split(apic-vcpu-kvm)) {
+   if (test_bit(vector,
+(void *) apic-vcpu-arch.eoi_exit_bitmaps)) {
+   apic-vcpu-arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic-vcpu);
+   }
+   return;
+   }
+
if (kvm_ioapic_handles_vector(apic-vcpu-kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic-regs + APIC_TMR))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19c8980..5e01810 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6392,6 +6392,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu-arch.pending_ioapic_eoi  255);
+   if (test_bit(vcpu-arch.pending_ioapic_eoi,
+(void *) vcpu-arch.eoi_exit_bitmaps)) {
+   vcpu-run-exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu-run-eoi.vector =
+   vcpu-arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e2b41a..c6df36f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_ENABLE_IBS23
 #define KVM_REQ_DISABLE_IBS   24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_IOAPIC_EOI_EXIT   26
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID   1
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1e6f6c3..826a08d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -183,6 +183,7 @@ struct kvm_s390_skeys {
 #define KVM_EXIT_EPR  23
 #define KVM_EXIT_SYSTEM_EVENT 24
 #define KVM_EXIT_S390_STSI25
+#define KVM_EXIT_IOAPIC_EOI   26
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -329,6 +330,10

[PATCH v3 4/4] KVM: x86: Add support for local interrupt requests from userspace

2015-06-02 Thread Steve Rutherford
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupt requests.

This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
KVM_EXIT_GET_EXTINT.

The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
on the BSP, which causes the BSP to exit to userspace to fetch the
vector of the underlying external interrupt, which the BSP then
injects into the guest. This matches the PIC spec, and is necessary to
boot Windows.

Compiles for x86.

Update: Boots Windows and passes the KVM Unit Tests.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 Documentation/virtual/kvm/api.txt |  9 ++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/irq.c| 22 +--
 arch/x86/kvm/lapic.c  |  7 +
 arch/x86/kvm/lapic.h  |  2 ++
 arch/x86/kvm/x86.c| 59 +--
 include/uapi/linux/kvm.h  |  7 +
 7 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 6ab2a3f7..b5d90cb 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,15 @@ len must be a multiple of sizeof(struct kvm_s390_irq). 
It must be  0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+4.96 KVM_REQUEST_PIC_INJECTION
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Type: VM ioctl
+Parameters: none
+Returns: 0 on success, -1 on error.
+
+Informs the kernel that userspace has a pending external interrupt.
+
 
 5. The kvm_run structure
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f439ff..0e8b0fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
 
u64 eoi_exit_bitmaps[4];
int pending_ioapic_eoi;
+   bool userspace_extint_available;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 706e47a..1270b2a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v-arch.userspace_extint_available ||
+  v-arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept  irqchip_split(v-kvm))
+   return pending_userspace_extint(v);
+   else if (accept)
return pic_irqchip(v-kvm)-output; /* PIC */
else
return 0;
@@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
+   if (irqchip_split(v-kvm)  kvm_cpu_has_extint(v)) {
+   int vector = v-arch.pending_external_vector;
+
+   v-arch.pending_external_vector = -1;
+   return vector;
+   } else if (kvm_cpu_has_extint(v))
return kvm_pic_read_irq(v-kvm); /* PIC */
return -1;
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 766d297..012b56ee 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2094,3 +2094,10 @@ void kvm_lapic_init(void)
jump_label_rate_limit(apic_hw_disabled, HZ);
jump_label_rate_limit(apic_sw_disabled, HZ);
 }
+
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
+{
+   vcpu-arch.userspace_extint_available = true;
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   kvm_vcpu_kick(vcpu);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71b150c..7831e4d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
kvm_lapic_irq *irq,
unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 35d13d4..40e7509 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,8 @@
 #include asm/pvclock.h
 #include asm/div64.h
 
+#define GET_VECTOR_FROM_USERSPACE 1
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -4217,6 +4219,30 @@ long

Re: [RFC PATCH 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-05-28 Thread Steve Rutherford
On Wed, May 27, 2015 at 08:32:04AM +0300, Avi Kivity wrote:
 On 05/27/2015 05:06 AM, Steve Rutherford wrote:
 On Sun, May 24, 2015 at 07:46:03PM +0300, Avi Kivity wrote:
 On 05/13/2015 04:47 AM, Steve Rutherford wrote:
 Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
 userspace.
 
 Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
 to be informed (which is identical to the EOI_EXIT_BITMAP field used
 by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
 exits on older processors).
 
 [Note: A prototype using ResampleFDs found that decoupling the EOI
 from the VCPU's thread made it possible for the VCPU to not see a
 recent EOI after reentering the guest. This does not match real
 hardware.]
 
 Compile tested for Intel x86.
 
 Signed-off-by: Steve Rutherford srutherf...@google.com
 ---
   Documentation/virtual/kvm/api.txt | 10 ++
   arch/x86/include/asm/kvm_host.h   |  3 +++
   arch/x86/kvm/lapic.c  |  9 +
   arch/x86/kvm/x86.c| 11 +++
   include/linux/kvm_host.h  |  1 +
   include/uapi/linux/kvm.h  |  5 +
   6 files changed, 39 insertions(+)
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 0744b4e..dd92996 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
 */
__u64 kvm_valid_regs;
__u64 kvm_dirty_regs;
 +
 +  /* KVM_EXIT_IOAPIC_EOI */
 +struct {
 + __u8 vector;
 +} eoi;
 +
 +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
 +occurred, which should be handled by the userspace IOAPIC. Triggers when
 +the Irqchip has been split between userspace and the kernel.
 +
 The ioapic is a global resource, so it doesn't make sense for
 information about it to be returned in a per-vcpu structure
 EOI exits are a per-vcpu behavior, so this doesn't seem all that strange.
 
 (or to block the vcpu while it is being processed).
 Blocking doesn't feel clean, but doesn't seem all that bad, given
 that these operations are relatively rare on modern configurations.
 
 Agree, maybe the realtime people have an interest here.
 
 The way I'd model it is to emulate the APIC bus that connects local
 APICs and the IOAPIC, using a socket pair.  When the user-space
 ioapic wants to inject an interrupt, it sends a message to the local
 APICs which then inject it, and when it's ack'ed the EOI is sent
 back on the same bus.
 Although I'm not certain about this, it sounds to me like this would
 require a kernel thread to be waiting (in some way) on this socket, which
 seems rather heavy handed.
 
 It's been a while since I did kernel programming, but I think you
 can queue a callback to be called when an I/O is ready, and not
 require a thread.  IIRC we do that with irqfd to cause an interrupt
 to be injected.
 

This should be possible, but it's going to add a ton of complexity, and I don't 
really see any compelling benefits. If there is a compelling reason to switch 
to a socket based interface, I'm definitely willing to refactor.

Steve
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 2/4] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-05-26 Thread Steve Rutherford
On Sun, May 24, 2015 at 07:46:03PM +0300, Avi Kivity wrote:
 On 05/13/2015 04:47 AM, Steve Rutherford wrote:
 Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
 userspace.
 
 Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
 to be informed (which is identical to the EOI_EXIT_BITMAP field used
 by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
 exits on older processors).
 
 [Note: A prototype using ResampleFDs found that decoupling the EOI
 from the VCPU's thread made it possible for the VCPU to not see a
 recent EOI after reentering the guest. This does not match real
 hardware.]
 
 Compile tested for Intel x86.
 
 Signed-off-by: Steve Rutherford srutherf...@google.com
 ---
   Documentation/virtual/kvm/api.txt | 10 ++
   arch/x86/include/asm/kvm_host.h   |  3 +++
   arch/x86/kvm/lapic.c  |  9 +
   arch/x86/kvm/x86.c| 11 +++
   include/linux/kvm_host.h  |  1 +
   include/uapi/linux/kvm.h  |  5 +
   6 files changed, 39 insertions(+)
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 0744b4e..dd92996 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
   */
  __u64 kvm_valid_regs;
  __u64 kvm_dirty_regs;
 +
 +/* KVM_EXIT_IOAPIC_EOI */
 +struct {
 +   __u8 vector;
 +} eoi;
 +
 +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
 +occurred, which should be handled by the userspace IOAPIC. Triggers when
 +the Irqchip has been split between userspace and the kernel.
 +
 
 The ioapic is a global resource, so it doesn't make sense for
 information about it to be returned in a per-vcpu structure
EOI exits are a per-vcpu behavior, so this doesn't seem all that strange.

 (or to block the vcpu while it is being processed).

Blocking doesn't feel clean, but doesn't seem all that bad, given
that these operations are relatively rare on modern configurations.

 
 The way I'd model it is to emulate the APIC bus that connects local
 APICs and the IOAPIC, using a socket pair.  When the user-space
 ioapic wants to inject an interrupt, it sends a message to the local
 APICs which then inject it, and when it's ack'ed the EOI is sent
 back on the same bus.
Although I'm not certain about this, it sounds to me like this would
require a kernel thread to be waiting (in some way) on this socket, which
seems rather heavy handed.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 3/4] KVM: x86: Add EOI exit bitmap inference

2015-05-12 Thread Steve Rutherford
In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low 24 MSI routes, the EOI Exit bit
corresponding to the destination vector will be set for the
destination VCPU.

The intention is for the userspace IOAPIC to use MSI routes [0,23] to
inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford srutherf...@google.com
---
 arch/x86/kvm/ioapic.c| 11 +++
 arch/x86/kvm/ioapic.h|  1 +
 arch/x86/kvm/lapic.c |  2 ++
 arch/x86/kvm/x86.c   | 13 +++--
 include/linux/kvm_host.h |  4 
 virt/kvm/irqchip.c   | 32 
 6 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 856f791..3323c86 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -672,3 +672,14 @@ int kvm_set_ioapic(struct kvm *kvm, struct 
kvm_ioapic_state *state)
spin_unlock(ioapic-lock);
return 0;
 }
+
+void kvm_vcpu_request_scan_userspace_ioapic(struct kvm *kvm)
+{
+   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+
+   if (ioapic)
+   return;
+   if (!lapic_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..b7af71b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -123,4 +123,5 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
u32 *tmr);
 
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 42fada6f..7533b87 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -211,6 +211,8 @@ out:
 
if (!irqchip_split(kvm))
kvm_vcpu_request_scan_ioapic(kvm);
+   else
+   kvm_vcpu_request_scan_userspace_ioapic(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc27c35..6127fe7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6335,8 +6335,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
-   if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
-   vcpu_scan_ioapic(vcpu);
+   if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) {
+   if (irqchip_split(vcpu-kvm)) {
+   memset(vcpu-arch.eoi_exit_bitmaps, 0, 32);
+   kvm_scan_ioapic_routes(
+   vcpu, vcpu-arch.eoi_exit_bitmaps);
+   kvm_x86_ops-load_eoi_exitmap(
+   vcpu, vcpu-arch.eoi_exit_bitmaps);
+
+   } else
+   vcpu_scan_ioapic(vcpu);
+   }
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cef20ad..678215a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -438,10 +438,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_vcpu_request_scan_userspace_ioapic(struct kvm *kvm);
 #else
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 }
+static inline void kvm_vcpu_request_scan_userspace_ioapic(struct kvm *kvm)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 8aaceed..8a253aa 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -205,6 +205,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
synchronize_srcu_expedited(kvm-irq_srcu);
 
+   kvm_vcpu_request_scan_userspace_ioapic(kvm);
+
new = old;
r = 0;
 
@@ -212,3 +214,33 @@ out:
kfree(new);
return r;
 }
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu-kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_rt_entries;
+
+   mutex_lock(kvm-irq_lock);
+   table = kvm-irq_routing;
+   nr_rt_entries = min_t(u32, table-nr_rt_entries, IOAPIC_NUM_PINS);
+   for (i = 0; i  nr_rt_entries; ++i