[PATCH v2 0/2] Add vector-hashing support for lowest-priority interrupts delivery

2015-12-15 Thread Feng Wu
This series add vector-hashing support for lowest-priority interrupts
delivery. As an example, modern Intel CPUs in server platform can use
this method to handle lowest-priority interrupts.

v2:
- Add vector-hashing support for non-vt-d PI case
- Fix some bugs Radim pointed out in v1
- Use a module parameter to control the vector-hashing mechanism

Feng Wu (2):
  KVM: x86: Use vector-hashing to deliver lowest-priority interrupts
  KVM: x86: Add lowest-priority support for vt-d posted-interrupts

 arch/x86/kvm/irq_comm.c |  27 +--
 arch/x86/kvm/lapic.c| 124 
 arch/x86/kvm/lapic.h|   4 ++
 arch/x86/kvm/vmx.c  |  12 -
 arch/x86/kvm/x86.c  |   9 
 arch/x86/kvm/x86.h  |   1 +
 6 files changed, 160 insertions(+), 17 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-15 Thread Feng Wu
Use vector-hashing to deliver lowest-priority interrupts for
VT-d posted-interrupts.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/kvm/lapic.c | 67 
 arch/x86/kvm/lapic.h |  2 ++
 arch/x86/kvm/vmx.c   | 12 --
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e29001f..d4f2c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -854,6 +854,73 @@ out:
 }
 
 /*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+{
+   struct kvm_apic_map *map;
+   struct kvm_vcpu *vcpu = NULL;
+
+   if (irq->shorthand)
+   return NULL;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+   kvm_lowest_prio_delivery(irq)) {
+   u16 cid;
+   int i, idx = 0;
+   unsigned long bitmap = 1;
+   unsigned int dest_vcpus = 0;
+   struct kvm_lapic **dst = NULL;
+
+
+   if (!kvm_apic_logical_map_valid(map))
+   goto out;
+
+   apic_logical_id(map, irq->dest_id, , (u16 *));
+
+   if (cid >= ARRAY_SIZE(map->logical_map))
+   goto out;
+
+   dst = map->logical_map[cid];
+
+   for_each_set_bit(i, , 16) {
+   if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
+   clear_bit(i, );
+   continue;
+   }
+   }
+
+   dest_vcpus = hweight16(bitmap);
+
+   if (dest_vcpus != 0) {
+   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+, 16);
+   vcpu = dst[idx-1]->vcpu;
+   }
+   }
+
+out:
+   rcu_read_unlock();
+   return vcpu;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
+/*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
  */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6890ef0..52bffce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct 
kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
 int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
   const unsigned long *bitmap, u32 bitmap_size);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..3f89189 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, 
unsigned int host_irq,
 */
 
kvm_set_msi_irq(e, );
-   if (!kvm_intr_is_single_vcpu(kvm, , ))
-   continue;
+
+   if (!kvm_intr_is_single_vcpu(kvm, , )) {
+   if (!kvm_vector_hashing_enabled() ||
+   irq.delivery_mode != APIC_DM_LOWEST)
+   continue;
+
+   vcpu = kvm_intr_vector_hashing_dest(kvm, );
+   if (!vcpu)
+   continue;
+   }
 
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-15 Thread Feng Wu
Use vector-hashing to deliver lowest-priority interrupts, As an
example, modern Intel CPUs in server platform use this method to
handle lowest-priority interrupts.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/kvm/irq_comm.c | 27 ++-
 arch/x86/kvm/lapic.c| 57 -
 arch/x86/kvm/lapic.h|  2 ++
 arch/x86/kvm/x86.c  |  9 
 arch/x86/kvm/x86.h  |  1 +
 5 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..c8c5f61 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -32,6 +32,7 @@
 #include "ioapic.h"
 
 #include "lapic.h"
+#include "x86.h"
 
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
   struct kvm *kvm, int irq_source_id, int level,
@@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct 
kvm_kernel_irq_routing_entry *e,
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map)
 {
-   int i, r = -1;
+   int i, r = -1, idx = 0;
struct kvm_vcpu *vcpu, *lowest = NULL;
+   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+   unsigned int dest_vcpus = 0;
 
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_lowest_prio_delivery(irq)) {
@@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, , dest_map))
return r;
 
+   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
continue;
@@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
} else if (kvm_lapic_enabled(vcpu)) {
-   if (!lowest)
-   lowest = vcpu;
-   else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
-   lowest = vcpu;
+   if (!kvm_vector_hashing_enabled()) {
+   if (!lowest)
+   lowest = vcpu;
+   else if (kvm_apic_compare_prio(vcpu, lowest) < 
0)
+   lowest = vcpu;
+   } else {
+   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+   dest_vcpus++;
+   }
}
}
 
+   if (dest_vcpus != 0) {
+   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+   lowest = kvm_get_vcpu(kvm, idx - 1);
+   }
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..e29001f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
kvm_lapic *source,
}
 }
 
+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+  const unsigned long *bitmap, u32 bitmap_size)
+{
+   u32 mod;
+   int i, idx = 0;
+
+   mod = vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
+   BUG_ON(idx > bitmap_size);
+   }
+
+   return idx;
+}
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
@@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, 
struct kvm_lapic *src,
dst = map->logical_map[cid];
 
if (kvm_lowest_prio_delivery(irq)) {
-   int l = -1;
-   for_each_set_bit(i, , 16) {
-   if (!dst[i])
-   continue;
-   if (l < 0)
-   l = i;
-   else if (kvm_apic_compare_prio(dst[i]->vcpu, 
dst[l]->vcpu) < 0)
-   l = i;
+   if (!kvm_vector_hashing_enabled()) {
+   int l = -1;
+   for_each_set_bit(i, , 16) {
+   if (!dst[i])
+   continue;
+   if (l < 0)
+   l = i;
+

[PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-08 Thread Feng Wu
Use vector-hashing to handle lowest-priority interrupts for
posted-interrupts. As an example, modern Intel CPUs use this
method to handle lowest-priority interrupts.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c | 52 +
 arch/x86/kvm/lapic.c| 57 +
 arch/x86/kvm/lapic.h|  2 ++
 arch/x86/kvm/vmx.c  | 14 --
 5 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9265196..e225106 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
 
 void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 struct kvm_lapic_irq *irq);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..8156e45 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -266,6 +266,58 @@ out:
return r;
 }
 
+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+
+{
+   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+   unsigned int dest_vcpus = 0;
+   struct kvm_vcpu *vcpu;
+   unsigned int i, mod, idx = 0;
+
+   vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
+   if (vcpu)
+   return vcpu;
+
+   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+   dest_vcpus++;
+   }
+
+   if (dest_vcpus == 0)
+   return NULL;
+
+   mod = irq->vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1;
+   BUG_ON(idx >= KVM_MAX_VCPUS);
+   }
+
+   return kvm_get_vcpu(kvm, idx - 1);
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu)
 {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..4937aa4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -816,6 +816,63 @@ out:
return ret;
 }
 
+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+  struct kvm_lapic_irq *irq)
+{
+   struct kvm_apic_map *map;
+   struct kvm_vcpu *vcpu = NULL;
+
+   if (irq->shorthand)
+   return NULL;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+   kvm_lowest_prio_delivery(irq)) {
+   u16 cid;
+   int i, idx = 0;
+   unsigned long bitmap = 1;
+   unsigned int mod, dest_vcpus = 0;
+   struct kvm_lapic **dst = NULL;
+
+
+   if (!kvm_apic_logical_map_valid(map))
+   goto out;
+
+   apic_logical_id(map, irq->dest_id, , (u16 *));
+
+   if (cid >= ARRAY_SIZE(map->logical_map))
+   goto out;
+
+   dst = map->logical_map[cid];
+
+   for_each_set_bit(i, , 16) {
+   if (!dst[i])
+   continue;
+
+   dest_vcpus++;
+   }
+
+   mod = irq->vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(, KVM_MAX_VCPUS, idx) + 1;
+   BUG_ON(idx >= KVM_MAX_VCPUS)

[PATCH] genirq: Move irq_set_vcpu_affinity out of "#ifdef CONFIG_SMP"

2015-10-03 Thread Feng Wu
irq_set_vcpu_affinity() is needed when CONFIG_SMP=n, so move the
definition out of "#ifdef CONFIG_SMP"

Suggested-by: Paolo Bonzini <pbonz...@redhat.com>
Signed-off-by: Feng Wu <feng...@intel.com>
---
 kernel/irq/manage.c | 62 ++---
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1c58655..90b378d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -258,37 +258,6 @@ int irq_set_affinity_hint(unsigned int irq, const struct 
cpumask *m)
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
-/**
- * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
- * @irq: interrupt number to set affinity
- * @vcpu_info: vCPU specific data
- *
- * This function uses the vCPU specific data to set the vCPU
- * affinity for an irq. The vCPU specific data is passed from
- * outside, such as KVM. One example code path is as below:
- * KVM -> IOMMU -> irq_set_vcpu_affinity().
- */
-int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
-{
-   unsigned long flags;
-   struct irq_desc *desc = irq_get_desc_lock(irq, , 0);
-   struct irq_data *data;
-   struct irq_chip *chip;
-   int ret = -ENOSYS;
-
-   if (!desc)
-   return -EINVAL;
-
-   data = irq_desc_get_irq_data(desc);
-   chip = irq_data_get_irq_chip(data);
-   if (chip && chip->irq_set_vcpu_affinity)
-   ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
-   irq_put_desc_unlock(desc, flags);
-
-   return ret;
-}
-EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
-
 static void irq_affinity_notify(struct work_struct *work)
 {
struct irq_affinity_notify *notify =
@@ -424,6 +393,37 @@ setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 }
 #endif
 
+/**
+ * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
+ * @irq: interrupt number to set affinity
+ * @vcpu_info: vCPU specific data
+ *
+ * This function uses the vCPU specific data to set the vCPU
+ * affinity for an irq. The vCPU specific data is passed from
+ * outside, such as KVM. One example code path is as below:
+ * KVM -> IOMMU -> irq_set_vcpu_affinity().
+ */
+int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
+{
+   unsigned long flags;
+   struct irq_desc *desc = irq_get_desc_lock(irq, , 0);
+   struct irq_data *data;
+   struct irq_chip *chip;
+   int ret = -ENOSYS;
+
+   if (!desc)
+   return -EINVAL;
+
+   data = irq_desc_get_irq_data(desc);
+   chip = irq_data_get_irq_chip(data);
+   if (chip && chip->irq_set_vcpu_affinity)
+   ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
+   irq_put_desc_unlock(desc, flags);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
+
 void __disable_irq(struct irq_desc *desc)
 {
if (!desc->depth++)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Add virt directory to the top Makefile

2015-09-22 Thread Feng Wu
We need to build files in virt/lib/, which are now used by
KVM and VFIO, so add virt directory to the top Makefile.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 Makefile  | 8 +---
 arch/x86/kvm/Makefile | 3 ---
 virt/Makefile | 1 +
 3 files changed, 6 insertions(+), 6 deletions(-)
 create mode 100644 virt/Makefile

diff --git a/Makefile b/Makefile
index 35b4c19..d82fceb 100644
--- a/Makefile
+++ b/Makefile
@@ -550,6 +550,7 @@ drivers-y   := drivers/ sound/ firmware/
 net-y  := net/
 libs-y := lib/
 core-y := usr/
+virt-y := virt/
 endif # KBUILD_EXTMOD
 
 ifeq ($(dot-config),1)
@@ -890,10 +891,10 @@ core-y+= kernel/ mm/ fs/ ipc/ security/ 
crypto/ block/
 
 vmlinux-dirs   := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
-$(net-y) $(net-m) $(libs-y) $(libs-m)))
+$(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
 
 vmlinux-alldirs:= $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
-$(init-) $(core-) $(drivers-) $(net-) $(libs-
+$(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-
 
 init-y := $(patsubst %/, %/built-in.o, $(init-y))
 core-y := $(patsubst %/, %/built-in.o, $(core-y))
@@ -902,10 +903,11 @@ net-y := $(patsubst %/, %/built-in.o, 
$(net-y))
 libs-y1:= $(patsubst %/, %/lib.a, $(libs-y))
 libs-y2:= $(patsubst %/, %/built-in.o, $(libs-y))
 libs-y := $(libs-y1) $(libs-y2)
+virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
 
 # Externally visible symbols (used by link-vmlinux.sh)
 export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
-export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y)
+export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) 
$(virt-y)
 export KBUILD_LDS  := arch/$(SRCARCH)/kernel/vmlinux.lds
 export LDFLAGS_vmlinux
 # used by scripts/pacmage/Makefile
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 05cc2d7..67d215c 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,9 +6,6 @@ CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
-LIB := ../../../virt/lib
-
-obj-$(CONFIG_IRQ_BYPASS_MANAGER)   += $(LIB)/
 
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/virt/Makefile b/virt/Makefile
new file mode 100644
index 000..335dc0b
--- /dev/null
+++ b/virt/Makefile
@@ -0,0 +1 @@
+obj-y  += ./lib/
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Add virt directory to the top Makefile

2015-09-22 Thread Feng Wu
We need to build files in virt/lib/, so add virt directory to
the top Makefile.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v2:
- Remove the explicit 'virt' entry in the KBUILD_ALLDIRS assignment
- Remove './' in virt/Makefile

 Makefile  | 10 ++
 arch/x86/kvm/Makefile |  3 ---
 virt/Makefile |  1 +
 3 files changed, 7 insertions(+), 7 deletions(-)
 create mode 100644 virt/Makefile

diff --git a/Makefile b/Makefile
index 35b4c19..0e60876 100644
--- a/Makefile
+++ b/Makefile
@@ -550,6 +550,7 @@ drivers-y   := drivers/ sound/ firmware/
 net-y  := net/
 libs-y := lib/
 core-y := usr/
+virt-y := virt/
 endif # KBUILD_EXTMOD
 
 ifeq ($(dot-config),1)
@@ -890,10 +891,10 @@ core-y+= kernel/ mm/ fs/ ipc/ security/ 
crypto/ block/
 
 vmlinux-dirs   := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
-$(net-y) $(net-m) $(libs-y) $(libs-m)))
+$(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
 
 vmlinux-alldirs:= $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
-$(init-) $(core-) $(drivers-) $(net-) $(libs-
+$(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-
 
 init-y := $(patsubst %/, %/built-in.o, $(init-y))
 core-y := $(patsubst %/, %/built-in.o, $(core-y))
@@ -902,14 +903,15 @@ net-y := $(patsubst %/, %/built-in.o, 
$(net-y))
 libs-y1:= $(patsubst %/, %/lib.a, $(libs-y))
 libs-y2:= $(patsubst %/, %/built-in.o, $(libs-y))
 libs-y := $(libs-y1) $(libs-y2)
+virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
 
 # Externally visible symbols (used by link-vmlinux.sh)
 export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
-export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y)
+export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) 
$(virt-y)
 export KBUILD_LDS  := arch/$(SRCARCH)/kernel/vmlinux.lds
 export LDFLAGS_vmlinux
 # used by scripts/pacmage/Makefile
-export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch 
Documentation include samples scripts tools virt)
+export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch 
Documentation include samples scripts tools)
 
 vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 05cc2d7..67d215c 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,9 +6,6 @@ CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
-LIB := ../../../virt/lib
-
-obj-$(CONFIG_IRQ_BYPASS_MANAGER)   += $(LIB)/
 
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/virt/Makefile b/virt/Makefile
new file mode 100644
index 000..be78347
--- /dev/null
+++ b/virt/Makefile
@@ -0,0 +1 @@
+obj-y  += lib/
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 15/18] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-18 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v9:
- Use 'if' instead of "? :" in kvm_arch_update_irqfd_routing()
- coding style

v8:
- Remove callback .arch_update()
- Remove kvm_arch_irqfd_init()
- Call kvm_arch_update_irqfd_routing() instead.

 arch/x86/kvm/x86.c   |  9 +
 include/linux/kvm_host.h |  2 ++
 virt/kvm/eventfd.c   | 20 +++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79dac02..58688aa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8293,6 +8293,15 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
   " fails: %d\n", irqfd->consumer.token, ret);
 }
 
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   if (!kvm_x86_ops->update_pi_irte)
+   return -EINVAL;
+
+   return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f183fb..feba1fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1174,6 +1174,8 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *,
   struct irq_bypass_producer *);
 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c0a56a1..94306a3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
struct irq_bypass_consumer *cons)
 {
 }
+
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+   struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   return 0;
+}
 #endif
 
 static int
@@ -582,13 +589,24 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(>irqfds.lock);
 
-   list_for_each_entry(irqfd, >irqfds.items, list)
+   list_for_each_entry(irqfd, >irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+   if (irqfd->producer) {
+   ret = kvm_arch_update_irqfd_routing(
+   irqfd->kvm, irqfd->producer->irq,
+   irqfd->gsi, 1);
+   WARN_ON(ret);
+   }
+#endif
+   }
+
spin_unlock_irq(>irqfds.lock);
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 09/18] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-18 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v9:
- Move kvm_intr_is_single_vcpu_fast() to lapic.c
- Remove incorrect WARN_ON_ONCE()

v8:
- Some optimizations in kvm_intr_is_single_vcpu().
- Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
- Add kvm_intr_is_single_vcpu_fast() as the fast path to find
  the target vCPU for the single-destination interrupt

 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/irq_comm.c | 27 +++
 arch/x86/kvm/lapic.c| 59 +
 arch/x86/kvm/lapic.h|  2 ++
 4 files changed, 91 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..f86a0da 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,33 @@ out:
return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+   return true;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   if (++r == 2)
+   return false;
+
+   *dest_vcpu = vcpu;
+   }
+
+   return r == 1;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..3c8fc71 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -764,6 +764,65 @@ out:
return ret;
 }
 
+bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
+   struct kvm_vcpu **dest_vcpu)
+{
+   struct kvm_apic_map *map;
+   bool ret = false;
+   struct kvm_lapic *dst = NULL;
+
+   if (irq->shorthand)
+   return false;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+   if (irq->dest_id == 0xFF)
+   goto out;
+
+   if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
+   goto out;
+
+   dst = map->phys_map[irq->dest_id];
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   } else {
+   u16 cid;
+   unsigned long bitmap = 1;
+   int i, r = 0;
+
+   if (!kvm_apic_logical_map_valid(map))
+   goto out;
+
+   apic_logical_id(map, irq->dest_id, , (u16 *));
+
+   if (cid >= ARRAY_SIZE(map->logical_map))
+   goto out;
+
+   for_each_set_bit(i, , 16) {
+   dst = map->logical_map[cid][i];
+   if (++r == 2)
+   goto out;
+   }
+
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   }
+
+   ret = true;
+out:
+   rcu_read_unlock();
+   return ret;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7195274..032fe2d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_v

[PATCH v9 13/18] KVM: x86: Update IRTE for posted-interrupts

2015-09-18 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v9:
- Check !kvm_arch_has_assigned_device(kvm) first then
  !irq_remapping_cap(IRQ_POSTING_CAP)

v8:
- Move 'kvm_arch_update_pi_irte' to vmx.c as a callback
- Only update the PI irte when VM has assigned devices
- Add a trace point for VT-d posted-interrupts when we update
  or disable it for a specific irq.

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/trace.h| 33 
 arch/x86/kvm/vmx.c  | 83 +
 arch/x86/kvm/x86.c  |  2 +
 4 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index daa6126..8c44286 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -862,6 +862,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+
+   int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4eae7c3..539a9e4 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -974,6 +974,39 @@ TRACE_EVENT(kvm_enter_smm,
  __entry->smbase)
 );
 
+/*
+ * Tracepoint for VT-d posted-interrupts.
+ */
+TRACE_EVENT(kvm_pi_irte_update,
+   TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
+unsigned int gvec, u64 pi_desc_addr, bool set),
+   TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+
+   TP_STRUCT__entry(
+   __field(unsigned int,   vcpu_id )
+   __field(unsigned int,   gsi )
+   __field(unsigned int,   gvec)
+   __field(u64,pi_desc_addr)
+   __field(bool,   set )
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id= vcpu_id;
+   __entry->gsi= gsi;
+   __entry->gvec   = gvec;
+   __entry->pi_desc_addr   = pi_desc_addr;
+   __entry->set= set;
+   ),
+
+   TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+ "gvec: 0x%x, pi_desc_addr: 0x%llx",
+ __entry->set ? "enabled and being updated" : "disabled",
+ __entry->vcpu_id,
+ __entry->gsi,
+ __entry->gvec,
+ __entry->pi_desc_addr)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 316f9bf..11bda72 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 #include "pmu.h"
@@ -605,6 +606,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
@@ -10344,6 +10350,81 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!kvm_arch_has_assigned_device(kvm) ||
+   !irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(>irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, >irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, _rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a vCPU, we still use interrupt remapping
+* for these 

[PATCH v9 10/18] KVM: Make struct kvm_irq_routing_table accessible

2015-09-18 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 include/linux/kvm_host.h | 14 ++
 virt/kvm/irqchip.c   | 10 --
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac8d21..5f183fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..2cf45d3 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-18 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

v9:
- Include the whole series:
[01/18]: irq bypasser manager
[02/18] - [06/18]: Common non-architecture part for VT-d PI and ARM side 
forwarded irq
[07/18] - [18/18]: VT-d PI part

v8:
refer to the changelog in each patch

v7:
* Define two weak irq bypass callbacks:
  - kvm_arch_irq_bypass_start()
  - kvm_arch_irq_bypass_stop()
* Remove the x86 dummy implementation of the above two functions.
* Print some useful information instead of WARN_ON() when the
  irq bypass consumer unregistration fails.
* Fix an issue when calling pi_pre_block and pi_post_block.

v6:
* Rebase on 4.2.0-rc6
* Rebase on https://lkml.org/lkml/2015/8/6/526 and 
http://www.gossamer-threads.com/lists/linux/kernel/2235623
* Make the add_consumer and del_consumer callbacks static
* Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
* Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
* Remove optional dummy callbacks for irq producer

v4:
* For lowest-priority interrupt, only support single-CPU destination
interrupts at the current stage, more common lowest priority support
will be added later.
* Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
the posted-interrupts in the HLT emulation path.
* Some small changes (coding style, typo, add some code comments)

v3:
* Adjust the Posted-interrupts Descriptor updating logic when vCPU is
  preempted or blocked.
* KVM_DEV_VFIO_DEVICE_POSTING_IRQ --> KVM_DEV_VFIO_DEVICE_POST_IRQ
* __KVM_HAVE_ARCH_KVM_VFIO_POSTING --> __KVM_HAVE_ARCH_KVM_VFIO_POST
* Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
  can be used to change back to remapping mode.
* Fix typo

v2:
* Use VFIO framework to enable this feature, the VFIO part of this series is
  base on Eric's patch "[PATCH v3 0/8] KVM-VFIO IRQ forward control"
* Rebase this patchset on 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
  then revise some irq logic based on the new hierarchy irqdomain patches 
provided
  by Jiang Liu <jiang@linux.intel.com>


*** BLURB HERE ***

Alex Williamson (1):
  virt: IRQ bypass manager

Eric Auger (4):
  KVM: arm/arm64: select IRQ_BYPASS_MANAGER
  KVM: create kvm_irqfd.h
  KVM: introduce kvm_arch functions for IRQ bypass
  KVM: eventfd: add irq bypass consumer management

Feng Wu (13):
  KVM: x86: select IRQ_BYPASS_MANAGER
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  vfio: Register/unregister irq_bypass_producer
  KVM: x86: Update IRTE for posted-interrupts
  KVM: Implement IRQ bypass consumer callbacks for x86
  KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 Documentation/kernel-parameters.txt   |   1 +
 Documentation/virtual/kvm/locking.txt |  12 ++
 MAINTAINERS   |   7 +
 arch/arm/kvm/Kconfig  |   2 +
 arch/arm/kvm/Makefile |   1 +
 arch/arm64/kvm/Kconfig|   2 +
 arch/arm64/kvm/Makefile   |   1 +
 arch/x86/include/asm/kvm_host.h   |  24 +++
 arch/x86/kvm/Kconfig  |   3 +
 arch/x86/kvm/Makefile |   3 +
 arch/x86/kvm/irq_comm.c   |  32 ++-
 arch/x86/kvm/lapic.c  |  59 ++
 arch/x86/kvm/lapic.h  |   2 +
 arch/x86/kvm/trace.h  |  33 
 arch/x86/kvm/vmx.c| 361 +-
 arch/x86/kvm/x86.c| 108 +-
 drivers/iommu/irq_remapping.c |  12 +-
 drivers/vfio/pci/Kconfig  |   1 +
 drivers/vfio/pci/vfio_pci_intrs.c |   9 +
 drivers/vfio/pci/vfio_pci_private.h   |   2 +
 include/linux/irqbypass.h |  90 +
 include/linux/kvm_host.h  |  29 +++
 include/linux/kvm_irqfd.h |  71 +++
 virt/kvm/Kconfig  |   3 +
 virt/kvm/eventfd.c| 142 +++--
 virt/kvm/irqchip.c|  10 -
 virt/kvm/kvm_main.c   |   3 +
 virt/lib/Kconfig  |   2 +
 virt/lib/Makefile |   1 +
 virt/lib/irqbypass.c  | 257 
 30 files changed

[PATCH v9 18/18] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2015-09-18 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 Documentation/kernel-parameters.txt |  1 +
 drivers/iommu/irq_remapping.c   | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1d6f045..52aca36 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1547,6 +1547,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
nosid   disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
+   nopost  disable Interrupt Posting
 
iomem=  Disable strict checking of access to MMIO memory
strict  regions from userspace.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d99930..d8c3997 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 
 static int disable_irq_remap;
 static struct irq_remap_ops *remap_ops;
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
 
while (*str) {
-   if (!strncmp(str, "on", 2))
+   if (!strncmp(str, "on", 2)) {
disable_irq_remap = 0;
-   else if (!strncmp(str, "off", 3))
+   disable_irq_post = 0;
+   } else if (!strncmp(str, "off", 3)) {
disable_irq_remap = 1;
-   else if (!strncmp(str, "nosid", 5))
+   disable_irq_post = 1;
+   } else if (!strncmp(str, "nosid", 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, "no_x2apic_optout", 16))
no_x2apic_optout = 1;
+   else if (!strncmp(str, "nopost", 6))
+   disable_irq_post = 1;
 
str += strcspn(str, ",");
while (*str == ',')
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 17/18] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-18 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu <feng...@intel.com>
---
v9:
- Add description for blocked_vcpu_on_cpu_lock in 
Documentation/virtual/kvm/locking.txt
- Check !kvm_arch_has_assigned_device(vcpu->kvm) first, then
  !irq_remapping_cap(IRQ_POSTING_CAP)

v8:
- Rename 'pi_pre_block' to 'pre_block'
- Rename 'pi_post_block' to 'post_block'
- Change some comments
- Only add the vCPU to the blocking list when the VM has assigned devices.

 Documentation/virtual/kvm/locking.txt |  12 +++
 arch/x86/include/asm/kvm_host.h   |  13 +++
 arch/x86/kvm/vmx.c| 153 ++
 arch/x86/kvm/x86.c|  53 +---
 include/linux/kvm_host.h  |   3 +
 virt/kvm/kvm_main.c   |   3 +
 6 files changed, 227 insertions(+), 10 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt 
b/Documentation/virtual/kvm/locking.txt
index d68af4d..19f94a6 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -166,3 +166,15 @@ Comment:   The srcu read lock must be held while accessing 
memslots (e.g.
MMIO/PIO address->device structure mapping (kvm->buses).
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
if it is needed by multiple functions.
+
+Name:  blocked_vcpu_on_cpu_lock
+Type:  spinlock_t
+Arch:  x86
+Protects:  blocked_vcpu_on_cpu
+Comment:   This is a per-CPU lock and it is used for VT-d 
posted-interrupts.
+   When VT-d posted-interrupts is supported and the VM has assigned
+   devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
+   protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+   wakeup notification event since external interrupts from the
+   assigned devices happens, we will find the vCPU on the list to
+   wakeup.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ddd353..304fbb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -552,6 +552,8 @@ struct kvm_vcpu_arch {
 */
bool write_fault_to_shadow_pgtable;
 
+   bool halted;
+
/* set at EPT violation at this point */
unsigned long exit_qualification;
 
@@ -864,6 +866,17 @@ struct kvm_x86_ops {
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 
+   /*
+* Architecture specific hooks for vCPU blocking due to
+* HLT instruction.
+* Returns for .pre_block():
+*- 0 means continue to block the vCPU.
+*- 1 means we cannot block the vCPU since some event
+*happens during this period, such as, 'ON' bit in
+*posted-interrupts descriptor is set.
+*/
+   int (*pre_block)(struct kvm_vcpu *vcpu);
+   void (*post_block)(struct kvm_vcpu *vcpu);
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
  uint32_t guest_irq, bool set);
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 902a67d..9968896 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -879,6 +879,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2985,6 +2992,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6121,6 +6130,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, _cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   

[PATCH v9 07/18] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-09-18 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5c..271dd70 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,8 +446,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u16 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14;
+   /* bit 279:272 - Notification Vector */
+   u8  nv;
+   /* bit 287:280 - Reserved */
+   u8  rsvd_2;
+   /* bit 319:288 - Notification Destination */
+   u32 ndst;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 11/18] KVM: make kvm_set_msi_irq() public

2015-09-18 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
v8:
- Export kvm_set_msi_irq() so we can use it in vmx code

 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af11bca..daa6126 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1207,4 +1209,6 @@ int x86_set_memory_region(struct kvm *kvm,
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
 
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index f86a0da..4f6fa67 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct 
kvm_kernel_irq_routing_entry *e,
irq->level = 1;
irq->shorthand = 0;
 }
+EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 08/18] KVM: Add some helper functions for Posted-Interrupts

2015-09-18 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 271dd70..316f9bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 04/18] KVM: create kvm_irqfd.h

2015-09-18 Thread Feng Wu
From: Eric Auger 

Move _irqfd_resampler and _irqfd struct declarations in a new
public header: kvm_irqfd.h. They are respectively renamed into
kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
will be used by architecture specific code, in the context of
IRQ bypass manager integration.

Signed-off-by: Eric Auger 
---
 include/linux/kvm_irqfd.h | 69 ++
 virt/kvm/eventfd.c| 95 ---
 2 files changed, 92 insertions(+), 72 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
new file mode 100644
index 000..f926b39
--- /dev/null
+++ b/include/linux/kvm_irqfd.h
@@ -0,0 +1,69 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * Credit goes to Avi Kivity for the original idea.
+ */
+
+#ifndef __LINUX_KVM_IRQFD_H
+#define __LINUX_KVM_IRQFD_H
+
+#include 
+#include 
+
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgment through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct kvm_kernel_irqfd_resampler {
+   struct kvm *kvm;
+   /*
+* List of resampling struct _irqfd objects sharing this gsi.
+* RCU list modified under kvm->irqfds.resampler_lock
+*/
+   struct list_head list;
+   struct kvm_irq_ack_notifier notifier;
+   /*
+* Entry in list of kvm->irqfd.resampler_list.  Use for sharing
+* resamplers among irqfds on the same gsi.
+* Accessed and modified under kvm->irqfds.resampler_lock
+*/
+   struct list_head link;
+};
+
+struct kvm_kernel_irqfd {
+   /* Used for MSI fast-path */
+   struct kvm *kvm;
+   wait_queue_t wait;
+   /* Update side is protected by irqfds.lock */
+   struct kvm_kernel_irq_routing_entry irq_entry;
+   seqcount_t irq_entry_sc;
+   /* Used for level IRQ fast-path */
+   int gsi;
+   struct work_struct inject;
+   /* The resampler used by this irqfd (resampler-only) */
+   struct kvm_kernel_irqfd_resampler *resampler;
+   /* Eventfd notified on resample (resampler-only) */
+   struct eventfd_ctx *resamplefd;
+   /* Entry in list of irqfds for a resampler (resampler-only) */
+   struct list_head resampler_link;
+   /* Used for setup/shutdown */
+   struct eventfd_ctx *eventfd;
+   struct list_head list;
+   poll_table pt;
+   struct work_struct shutdown;
+};
+
+#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9ff4193..647ffb8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -39,68 +40,14 @@
 #include 
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * 
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * 
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-   struct kvm *kvm;
-   /*
-* List of resampling struct _irqfd objects sharing this gsi.
-* RCU list modified under kvm->irqfds.resampler_lock
-*/
-   struct list_head list;
-   struct kvm_irq_ack_notifier notifier;
-   /*
-* Entry in list of kvm->irqfd.resampler_list.  Use for sharing
-* resamplers among irqfds on the same gsi.
-* Accessed and modified under kvm->irqfds.resampler_lock
-*/
-   struct list_head link;
-};
-
-struct _irqfd {

[PATCH v9 03/18] KVM: arm/arm64: select IRQ_BYPASS_MANAGER

2015-09-18 Thread Feng Wu
From: Eric Auger <eric.au...@linaro.org>

Select IRQ_BYPASS_MANAGER when CONFIG_KVM is set
Also add compilation of virt/lib.

Signed-off-by: Eric Auger <eric.au...@linaro.org>
Signed-off-by: Feng Wu <feng...@intel.com>
---
v3 -> v4:
- add compilation of virt/lib in arm/arm64 KVM

v2 -> v3:
- [Feng Wu] Correct a typo in 'arch/arm64/kvm/Kconfig'

v1 -> v2:
- also set IRQ_BYPASS_MANAGER for arm64

 arch/arm/kvm/Kconfig| 2 ++
 arch/arm/kvm/Makefile   | 1 +
 arch/arm64/kvm/Kconfig  | 2 ++
 arch/arm64/kvm/Makefile | 1 +
 4 files changed, 6 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index bfb915d..3c565b9 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -3,6 +3,7 @@
 #
 
 source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
 
 menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -31,6 +32,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
  Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index c5eef02c..a6a41dd 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -24,3 +24,4 @@ obj-y += $(KVM)/arm/vgic.o
 obj-y += $(KVM)/arm/vgic-v2.o
 obj-y += $(KVM)/arm/vgic-v2-emul.o
 obj-y += $(KVM)/arm/arch_timer.o
+obj-y += ../../../virt/lib/
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index bfffe8f..2509539 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -3,6 +3,7 @@
 #
 
 source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
 
 menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -31,6 +32,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
---help---
  Support hosting virtualized guest machines.
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f90f4aa..55eec69 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,3 +27,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += ../../../virt/lib/
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer

2015-09-18 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Merge "[PATCH v7 08/17] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices"
  into this patch.

v6:
- Make the add_consumer and del_consumer callbacks static
- Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
- Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
- Remove optional dummy callbacks for irq producer

 drivers/vfio/pci/Kconfig| 1 +
 drivers/vfio/pci/vfio_pci_intrs.c   | 9 +
 drivers/vfio/pci/vfio_pci_private.h | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 579d83b..02912f1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
tristate "VFIO support for PCI devices"
depends on VFIO && PCI && EVENTFD
select VFIO_VIRQFD
+   select IRQ_BYPASS_MANAGER
help
  Support for the PCI VFIO bus driver.  This is required to make
  use of PCI drivers using the VFIO framework.
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 1f577b4..c65299d 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev->ctx[vector].trigger) {
free_irq(irq, vdev->ctx[vector].trigger);
+   irq_bypass_unregister_producer(>ctx[vector].producer);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
+   vdev->ctx[vector].producer.token = trigger;
+   vdev->ctx[vector].producer.irq = irq;
+   ret = irq_bypass_register_producer(>ctx[vector].producer);
+   if (unlikely(ret))
+   dev_info(>dev,
+   "irq bypass producer (token %p) registeration fails: %d\n",
+   vdev->ctx[vector].producer.token, ret);
+
vdev->ctx[vector].trigger = trigger;
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include 
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 14/18] KVM: Implement IRQ bypass consumer callbacks for x86

2015-09-18 Thread Feng Wu
Implement the following callbacks for x86:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop: dummy callback
- kvm_arch_irq_bypass_resume: dummy callback

and set CONFIG_HAVE_KVM_IRQ_BYPASS for x86.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Move the weak irq bypas stop and irq bypass start to this patch.
- Call kvm_x86_ops->update_pi_irte() instead of kvm_arch_update_pi_irte().

 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Kconfig|  1 +
 arch/x86/kvm/x86.c  | 44 +
 virt/kvm/eventfd.c  | 12 +++
 4 files changed, 58 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8c44286..0ddd353 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c951d44..b90776f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9dcd501..79dac02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #define CREATE_TRACE_POINTS
@@ -8249,6 +8251,48 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   if (kvm_x86_ops->update_pi_irte) {
+   irqfd->producer = prod;
+   return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+   prod->irq, irqfd->gsi, 1);
+   }
+
+   return -EINVAL;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   if (!kvm_x86_ops->update_pi_irte) {
+   WARN_ON(irqfd->producer != NULL);
+   return;
+   }
+
+   WARN_ON(irqfd->producer != prod);
+   irqfd->producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+   */
+   ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+   if (ret)
+   printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
+  " fails: %d\n", irqfd->consumer.token, ret);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d7a230f..c0a56a1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -256,6 +256,18 @@ static void irqfd_update(struct kvm *kvm, struct 
kvm_kernel_irqfd *irqfd)
write_seqcount_end(>irq_entry_sc);
 }
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+   struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+   struct irq_bypass_consumer *cons)
+{
+}
+#endif
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 16/18] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-09-18 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu <feng...@intel.com>
---
v9:
- Check !kvm_arch_has_assigned_device(vcpu->kvm) first, then
  !irq_remapping_cap(IRQ_POSTING_CAP)

v8:
- Add two wrapper fucntion vmx_vcpu_pi_load() and vmx_vcpu_pi_put().
- Only handle VT-d PI related logic when the VM has assigned devices.

 arch/x86/kvm/vmx.c | 79 ++
 1 file changed, 79 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 11bda72..902a67d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1943,6 +1943,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
preempt_enable();
 }
 
+static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+   !irq_remapping_cap(IRQ_POSTING_CAP))
+   return;
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+* are two possible cases:
+* 1. After running 'pre_block', context switch
+*happened. For this case, 'sn' was set in
+*vmx_vcpu_put(), so we need to clear it here.
+* 2. After running 'pre_block', we were blocked,
+*and woken up by some other guy. For this case,
+*we don't need to do anything, 'pi_post_block'
+*will do everything for us. However, we cannot
+*check whether it is case #1 or case #2 here
+*(maybe, not needed), so we also clear sn here,
+*I think it is not a big deal.
+*/
+   if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+   if (vcpu->cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+   }
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+   } while (cmpxchg(_desc->control, old.control,
+   new.control) != old.control);
+}
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -1993,10 +2039,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   vmx_vcpu_pi_load(vcpu, cpu);
+}
+
+static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+   !irq_remapping_cap(IRQ_POSTING_CAP))
+   return;
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu->preempted)
+   pi_set_sn(pi_desc);
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   vmx_vcpu_pi_put(vcpu);
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
@@ -4426,6 +4489,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(>pi_desc));
+
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body

[PATCH v9 06/18] KVM: eventfd: add irq bypass consumer management

2015-09-18 Thread Feng Wu
From: Eric Auger <eric.au...@linaro.org>

This patch adds the registration/unregistration of an
irq_bypass_consumer on irqfd assignment/deassignment.

Signed-off-by: Eric Auger <eric.au...@linaro.org>
Signed-off-by: Feng Wu <feng...@intel.com>
---
v4 -> v5:
- due to removal of static inline stubs, add
  #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  around consumer registration/unregistration
- add pr_info when registration fails

v2 -> v3 (Feng Wu):
- Use kvm_arch_irq_bypass_start
- Remove kvm_arch_irq_bypass_update
- Add member 'struct irq_bypass_producer *producer' in
  'struct kvm_kernel_irqfd', it is needed by posted interrupt.
- Remove 'irq_bypass_unregister_consumer' in kvm_irqfd_deassign()

v1 -> v2:
- populate of kvm and gsi removed
- unregister the consumer on irqfd_shutdown

 include/linux/kvm_irqfd.h |  2 ++
 virt/kvm/eventfd.c| 15 +++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index f926b39..0c1de05 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -64,6 +64,8 @@ struct kvm_kernel_irqfd {
struct list_head list;
poll_table pt;
struct work_struct shutdown;
+   struct irq_bypass_consumer consumer;
+   struct irq_bypass_producer *producer;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 647ffb8..d7a230f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
/*
 * It is now safe to release the object's resources
 */
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+   irq_bypass_unregister_consumer(>consumer);
+#endif
eventfd_ctx_put(irqfd->eventfd);
kfree(irqfd);
 }
@@ -379,6 +383,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 * we might race against the POLLHUP
 */
fdput(f);
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+   irqfd->consumer.token = (void *)irqfd->eventfd;
+   irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+   irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+   irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+   irqfd->consumer.start = kvm_arch_irq_bypass_start;
+   ret = irq_bypass_register_consumer(>consumer);
+   if (ret)
+   pr_info("irq bypass consumer (token %p) registration fails: 
%d\n",
+   irqfd->consumer.token, ret);
+#endif
 
return 0;
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 01/18] virt: IRQ bypass manager

2015-09-18 Thread Feng Wu
From: Alex Williamson <alex.william...@redhat.com>

When a physical I/O device is assigned to a virtual machine through
facilities like VFIO and KVM, the interrupt for the device generally
bounces through the host system before being injected into the VM.
However, hardware technologies exist that often allow the host to be
bypassed for some of these scenarios.  Intel Posted Interrupts allow
the specified physical edge interrupts to be directly injected into a
guest when delivered to a physical processor while the vCPU is
running.  ARM IRQ Forwarding allows forwarded physical interrupts to
be directly deactivated by the guest.

The IRQ bypass manager here is meant to provide the shim to connect
interrupt producers, generally the host physical device driver, with
interrupt consumers, generally the hypervisor, in order to configure
these bypass mechanism.  To do this, we base the connection on a
shared, opaque token.  For KVM-VFIO this is expected to be an
eventfd_ctx since this is the connection we already use to connect an
eventfd to an irqfd on the in-kernel path.  When a producer and
consumer with matching tokens is found, callbacks via both registered
participants allow the bypass facilities to be automatically enabled.

Signed-off-by: Alex Williamson <alex.william...@redhat.com>
Reviewed-by: Eric Auger <eric.au...@linaro.org>
Tested-by: Eric Auger <eric.au...@linaro.org>
Tested-by: Feng Wu <feng...@intel.com>
---
v4: All producer callbacks are optional, as with Intel PI, it's
possible for the producer to be blissfully unaware of the bypass.

 MAINTAINERS   |   7 ++
 include/linux/irqbypass.h |  90 
 virt/lib/Kconfig  |   2 +
 virt/lib/Makefile |   1 +
 virt/lib/irqbypass.c  | 257 ++
 5 files changed, 357 insertions(+)
 create mode 100644 include/linux/irqbypass.h
 create mode 100644 virt/lib/Kconfig
 create mode 100644 virt/lib/Makefile
 create mode 100644 virt/lib/irqbypass.c

diff --git a/MAINTAINERS b/MAINTAINERS
index a9ae6c1..10c8b2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10963,6 +10963,13 @@ L: net...@vger.kernel.org
 S: Maintained
 F: drivers/net/ethernet/via/via-velocity.*
 
+VIRT LIB
+M: Alex Williamson <alex.william...@redhat.com>
+M: Paolo Bonzini <pbonz...@redhat.com>
+L: kvm@vger.kernel.org
+S: Supported
+F: virt/lib/
+
 VIVID VIRTUAL VIDEO DRIVER
 M: Hans Verkuil <hverk...@xs4all.nl>
 L: linux-me...@vger.kernel.org
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
new file mode 100644
index 000..1551b5b
--- /dev/null
+++ b/include/linux/irqbypass.h
@@ -0,0 +1,90 @@
+/*
+ * IRQ offload/bypass manager
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ * Copyright (c) 2015 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef IRQBYPASS_H
+#define IRQBYPASS_H
+
+#include 
+
+struct irq_bypass_consumer;
+
+/*
+ * Theory of operation
+ *
+ * The IRQ bypass manager is a simple set of lists and callbacks that allows
+ * IRQ producers (ex. physical interrupt sources) to be matched to IRQ
+ * consumers (ex. virtualization hardware that allows IRQ bypass or offload)
+ * via a shared token (ex. eventfd_ctx).  Producers and consumers register
+ * independently.  When a token match is found, the optional @stop callback
+ * will be called for each participant.  The pair will then be connected via
+ * the @add_* callbacks, and finally the optional @start callback will allow
+ * any final coordination.  When either participant is unregistered, the
+ * process is repeated using the @del_* callbacks in place of the @add_*
+ * callbacks.  Match tokens must be unique per producer/consumer, 1:N pairings
+ * are not supported.
+ */
+
+/**
+ * struct irq_bypass_producer - IRQ bypass producer definition
+ * @node: IRQ bypass manager private list management
+ * @token: opaque token to match between producer and consumer
+ * @irq: Linux IRQ number for the producer device
+ * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
+ * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
+ * @stop: Perform any quiesce operations necessary prior to add/del (optional)
+ * @start: Perform any startup operations necessary after add/del (optional)
+ *
+ * The IRQ bypass producer structure represents an interrupt source for
+ * participation in possible host bypass, for instance an interrupt vector
+ * for a physical device assigned to a VM.
+ */
+struct irq_bypass_producer {
+   struct list_head node;
+   void *token;
+   int irq;
+   int (*add_consumer)(struct irq_bypass_producer *,
+   struct irq_bypass_consumer *);
+   void (*del_consumer)(struct irq_bypass_producer *,
+  

[PATCH v9 05/18] KVM: introduce kvm_arch functions for IRQ bypass

2015-09-18 Thread Feng Wu
From: Eric Auger <eric.au...@linaro.org>

This patch introduces
- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop
- kvm_arch_irq_bypass_start

They make possible to specialize the KVM IRQ bypass consumer in
case CONFIG_KVM_HAVE_IRQ_BYPASS is set.

Signed-off-by: Eric Auger <eric.au...@linaro.org>
Signed-off-by: Feng Wu <feng...@intel.com>
---
v4 -> v5:
- remove static inline stub functions

v2 -> v3 (Feng Wu):
- use 'kvm_arch_irq_bypass_start' instead of 'kvm_arch_irq_bypass_resume'
- Remove 'kvm_arch_irq_bypass_update', which is not needed to be
  a irqbypass callback per Alex's comments.
- Make kvm_arch_irq_bypass_add_producer return 'int'

v1 -> v2:
- use CONFIG_KVM_HAVE_IRQ_BYPASS instead CONFIG_IRQ_BYPASS_MANAGER
- rename all functions according to Paolo's proposal
- add kvm_arch_irq_bypass_update according to Feng's need

 include/linux/kvm_host.h | 10 ++
 virt/kvm/Kconfig |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 05e99b8..5ac8d21 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -1151,5 +1152,14 @@ static inline void kvm_vcpu_set_dy_eligible(struct 
kvm_vcpu *vcpu, bool val)
 {
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
+  struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
+  struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index e2c876d..9f8014d 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 config KVM_COMPAT
def_bool y
depends on COMPAT && !S390
+
+config HAVE_KVM_IRQ_BYPASS
+   bool
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 02/18] KVM: x86: select IRQ_BYPASS_MANAGER

2015-09-18 Thread Feng Wu
Select IRQ_BYPASS_MANAGER for x86 when CONFIG_KVM is set

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/kvm/Kconfig  | 2 ++
 arch/x86/kvm/Makefile | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d8a1d56..c951d44 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -3,6 +3,7 @@
 #
 
 source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
 
 menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -28,6 +29,7 @@ config KVM
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215c..05cc2d7 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,6 +6,9 @@ CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
+LIB := ../../../virt/lib
+
+obj-$(CONFIG_IRQ_BYPASS_MANAGER)   += $(LIB)/
 
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 06/13] vfio: Register/unregister irq_bypass_producer

2015-09-16 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Merge "[PATCH v7 08/17] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices"
  into this patch.

v6:
- Make the add_consumer and del_consumer callbacks static
- Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
- Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
- Remove optional dummy callbacks for irq producer

 drivers/vfio/pci/Kconfig| 1 +
 drivers/vfio/pci/vfio_pci_intrs.c   | 9 +
 drivers/vfio/pci/vfio_pci_private.h | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 579d83b..02912f1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
tristate "VFIO support for PCI devices"
depends on VFIO && PCI && EVENTFD
select VFIO_VIRQFD
+   select IRQ_BYPASS_MANAGER
help
  Support for the PCI VFIO bus driver.  This is required to make
  use of PCI drivers using the VFIO framework.
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 1f577b4..c65299d 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev->ctx[vector].trigger) {
free_irq(irq, vdev->ctx[vector].trigger);
+   irq_bypass_unregister_producer(>ctx[vector].producer);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
+   vdev->ctx[vector].producer.token = trigger;
+   vdev->ctx[vector].producer.irq = irq;
+   ret = irq_bypass_register_producer(>ctx[vector].producer);
+   if (unlikely(ret))
+   dev_info(>dev,
+   "irq bypass producer (token %p) registeration fails: %d\n",
+   vdev->ctx[vector].producer.token, ret);
+
vdev->ctx[vector].trigger = trigger;
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include 
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 05/13] KVM: make kvm_set_msi_irq() public

2015-09-16 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
v8:
- Export kvm_set_msi_irq() so we can use it in vmx code

 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af11bca..daa6126 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1207,4 +1209,6 @@ int x86_set_memory_region(struct kvm *kvm,
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
 
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 97ba1d6..add52d8 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct 
kvm_kernel_irq_routing_entry *e,
irq->level = 1;
irq->shorthand = 0;
 }
+EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 02/13] KVM: Add some helper functions for Posted-Interrupts

2015-09-16 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 271dd70..316f9bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)_desc->control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 04/13] KVM: Make struct kvm_irq_routing_table accessible

2015-09-16 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 include/linux/kvm_host.h | 14 ++
 virt/kvm/irqchip.c   | 10 --
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac8d21..5f183fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..2cf45d3 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 10/13] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-09-16 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Add two wrapper fucntion vmx_vcpu_pi_load() and vmx_vcpu_pi_put().
- Only handle VT-d PI related logic when the VM has assigned devices.

 arch/x86/kvm/vmx.c | 63 ++
 1 file changed, 63 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5a25651..5ceb280 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1943,6 +1943,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
preempt_enable();
 }
 
+static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(vcpu->kvm)))
+   return;
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+* are two possible cases:
+* 1. After running 'pi_pre_block', context switch
+*happened. For this case, 'sn' was set in
+*vmx_vcpu_put(), so we need to clear it here.
+* 2. After running 'pi_pre_block', we were blocked,
+*and woken up by some other guy. For this case,
+*we don't need to do anything, 'pi_post_block'
+*will do everything for us. However, we cannot
+*check whether it is case #1 or case #2 here
+*(maybe, not needed), so we also clear sn here,
+*I think it is not a big deal.
+*/
+   if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+   if (vcpu->cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+   }
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+   } while (cmpxchg(_desc->control, old.control,
+   new.control) != old.control);
+}
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -1993,10 +2039,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   vmx_vcpu_pi_load(vcpu, cpu);
+}
+
+static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(vcpu->kvm)))
+   return;
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu->preempted)
+   pi_set_sn(pi_desc);
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   vmx_vcpu_pi_put(vcpu);
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 11/13] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-16 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Rename 'pi_pre_block' to 'pre_block'
- Rename 'pi_post_block' to 'post_block'
- Change some comments
- Only add the vCPU to the blocking list when the VM has assigned devices.

 arch/x86/include/asm/kvm_host.h |  13 
 arch/x86/kvm/vmx.c  | 157 +++-
 arch/x86/kvm/x86.c  |  53 +++---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 217 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ddd353..304fbb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -552,6 +552,8 @@ struct kvm_vcpu_arch {
 */
bool write_fault_to_shadow_pgtable;
 
+   bool halted;
+
/* set at EPT violation at this point */
unsigned long exit_qualification;
 
@@ -864,6 +866,17 @@ struct kvm_x86_ops {
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 
+   /*
+* Architecture specific hooks for vCPU blocking due to
+* HLT instruction.
+* Returns for .pre_block():
+*- 0 means continue to block the vCPU.
+*- 1 means we cannot block the vCPU since some event
+*happens during this period, such as, 'ON' bit in
+*posted-interrupts descriptor is set.
+*/
+   int (*pre_block)(struct kvm_vcpu *vcpu);
+   void (*post_block)(struct kvm_vcpu *vcpu);
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
  uint32_t guest_irq, bool set);
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5ceb280..9888c43 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -879,6 +879,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -1959,10 +1966,10 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int 
cpu)
/*
 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
 * are two possible cases:
-* 1. After running 'pi_pre_block', context switch
+* 1. After running 'pre_block', context switch
 *happened. For this case, 'sn' was set in
 *vmx_vcpu_put(), so we need to clear it here.
-* 2. After running 'pi_pre_block', we were blocked,
+* 2. After running 'pre_block', we were blocked,
 *and woken up by some other guy. For this case,
 *we don't need to do anything, 'pi_post_block'
 *will do everything for us. However, we cannot
@@ -2985,6 +2992,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6105,6 +6114,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, _cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6289,6 +6317,8 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10414,6 +10444,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
 }
 
 /*
+ * This routine does the following things for vCPU which is going
+ * to be blocked i

[PATCH v8 09/13] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-16 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Remove callback .arch_update()
- Remove kvm_arch_irqfd_init()
- Call kvm_arch_update_irqfd_routing() instead.

 arch/x86/kvm/x86.c   |  7 +++
 include/linux/kvm_host.h |  2 ++
 virt/kvm/eventfd.c   | 19 ++-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79dac02..e189a94 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8293,6 +8293,13 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
   " fails: %d\n", irqfd->consumer.token, ret);
 }
 
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   return !kvm_x86_ops->update_pi_irte ? -EINVAL :
+   kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f183fb..feba1fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1174,6 +1174,8 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *,
   struct irq_bypass_producer *);
 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c0a56a1..89c9635 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -266,6 +266,12 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
struct irq_bypass_consumer *cons)
 {
 }
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+   struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   return 0;
+}
 #endif
 
 static int
@@ -582,13 +588,24 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(>irqfds.lock);
 
-   list_for_each_entry(irqfd, >irqfds.items, list)
+   list_for_each_entry(irqfd, >irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+   if (irqfd->producer) {
+   ret = kvm_arch_update_irqfd_routing(
+   irqfd->kvm, irqfd->producer->irq,
+   irqfd->gsi, 1);
+   WARN_ON(ret);
+   }
+#endif
+   }
+
spin_unlock_irq(>irqfds.lock);
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-16 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Some optimizations in kvm_intr_is_single_vcpu().
- Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
- Add kvm_intr_is_single_vcpu_fast() as the fast path to find
  the target vCPU for the single-destination interrupt

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/irq_comm.c | 94 +
 arch/x86/kvm/lapic.c|  5 +--
 arch/x86/kvm/lapic.h|  2 +
 4 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..97ba1d6 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,100 @@ out:
return r;
 }
 
+static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
+struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   struct kvm_apic_map *map;
+   bool ret = false;
+   struct kvm_lapic *dst = NULL;
+
+   if (irq->shorthand)
+   return false;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+   if (irq->dest_id == 0xFF)
+   goto out;
+
+   if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   dst = map->phys_map[irq->dest_id];
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   } else {
+   u16 cid;
+   unsigned long bitmap = 1;
+   int i, r = 0;
+
+   if (!kvm_apic_logical_map_valid(map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   apic_logical_id(map, irq->dest_id, , (u16 *));
+
+   if (cid >= ARRAY_SIZE(map->logical_map)) {
+   WARN_ON_ONCE(1);
+   goto out;
+   }
+
+   for_each_set_bit(i, , 16) {
+   dst = map->logical_map[cid][i];
+   if (++r == 2)
+   goto out;
+   }
+
+   if (dst && kvm_apic_present(dst->vcpu))
+   *dest_vcpu = dst->vcpu;
+   else
+   goto out;
+   }
+
+   ret = true;
+out:
+   rcu_read_unlock();
+   return ret;
+}
+
+
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+   return true;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   if (++r == 2)
+   return false;
+
+   *dest_vcpu = vcpu;
+   }
+
+   return r == 1;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..9848cd50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 /* The logical map is definitely wrong if we have multiple
  * modes at the same time.  (Physical map is always right.)
 

[PATCH v8 01/13] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-09-16 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu <feng...@intel.com>
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5c..271dd70 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,8 +446,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u16 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14;
+   /* bit 279:272 - Notification Vector */
+   u8  nv;
+   /* bit 287:280 - Reserved */
+   u8  rsvd_2;
+   /* bit 319:288 - Notification Destination */
+   u32 ndst;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 13/13] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2015-09-16 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 Documentation/kernel-parameters.txt |  1 +
 drivers/iommu/irq_remapping.c   | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1d6f045..52aca36 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1547,6 +1547,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
nosid   disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
+   nopost  disable Interrupt Posting
 
iomem=  Disable strict checking of access to MMIO memory
strict  regions from userspace.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d99930..d8c3997 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 
 static int disable_irq_remap;
 static struct irq_remap_ops *remap_ops;
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
 
while (*str) {
-   if (!strncmp(str, "on", 2))
+   if (!strncmp(str, "on", 2)) {
disable_irq_remap = 0;
-   else if (!strncmp(str, "off", 3))
+   disable_irq_post = 0;
+   } else if (!strncmp(str, "off", 3)) {
disable_irq_remap = 1;
-   else if (!strncmp(str, "nosid", 5))
+   disable_irq_post = 1;
+   } else if (!strncmp(str, "nosid", 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, "no_x2apic_optout", 16))
no_x2apic_optout = 1;
+   else if (!strncmp(str, "nopost", 6))
+   disable_irq_post = 1;
 
str += strcspn(str, ",");
while (*str == ',')
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 12/13] KVM: Warn if 'SN' is set during posting interrupts by software

2015-09-16 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu <feng...@intel.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9888c43..58fbbc6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4498,6 +4498,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(>pi_desc));
+
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 00/13] Add VT-d Posted-Interrupts support

2015-09-16 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

v8:
refer to the changelog in each patch

v7:
* Define two weak irq bypass callbacks:
  - kvm_arch_irq_bypass_start()
  - kvm_arch_irq_bypass_stop()
* Remove the x86 dummy implementation of the above two functions.
* Print some useful information instead of WARN_ON() when the
  irq bypass consumer unregistration fails.
* Fix an issue when calling pi_pre_block and pi_post_block.

v6:
* Rebase on 4.2.0-rc6
* Rebase on https://lkml.org/lkml/2015/8/6/526 and 
http://www.gossamer-threads.com/lists/linux/kernel/2235623
* Make the add_consumer and del_consumer callbacks static
* Remove pointless INIT_LIST_HEAD to 'vdev->ctx[vector].producer.node)'
* Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
* Remove optional dummy callbacks for irq producer

v4:
* For lowest-priority interrupt, only support single-CPU destination
interrupts at the current stage, more common lowest priority support
will be added later.
* Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
the posted-interrupts in the HLT emulation path.
* Some small changes (coding style, typo, add some code comments)

v3:
* Adjust the Posted-interrupts Descriptor updating logic when vCPU is
  preempted or blocked.
* KVM_DEV_VFIO_DEVICE_POSTING_IRQ --> KVM_DEV_VFIO_DEVICE_POST_IRQ
* __KVM_HAVE_ARCH_KVM_VFIO_POSTING --> __KVM_HAVE_ARCH_KVM_VFIO_POST
* Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
  can be used to change back to remapping mode.
* Fix typo

v2:
* Use VFIO framework to enable this feature, the VFIO part of this series is
  base on Eric's patch "[PATCH v3 0/8] KVM-VFIO IRQ forward control"
* Rebase this patchset on 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
  then revise some irq logic based on the new hierarchy irqdomain patches 
provided
  by Jiang Liu <jiang....@linux.intel.com>

Feng Wu (13):
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  vfio: Register/unregister irq_bypass_producer
  KVM: x86: Update IRTE for posted-interrupts
  KVM: Implement IRQ bypass consumer callbacks for x86
  KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  KVM: Warn if 'SN' is set during posting interrupts by software
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 Documentation/kernel-parameters.txt |   1 +
 arch/x86/include/asm/kvm_host.h |  24 +++
 arch/x86/kvm/Kconfig|   1 +
 arch/x86/kvm/irq_comm.c |  99 +-
 arch/x86/kvm/lapic.c|   5 +-
 arch/x86/kvm/lapic.h|   2 +
 arch/x86/kvm/trace.h|  33 
 arch/x86/kvm/vmx.c  | 361 +++-
 arch/x86/kvm/x86.c  | 106 ++-
 drivers/iommu/irq_remapping.c   |  12 +-
 drivers/vfio/pci/Kconfig|   1 +
 drivers/vfio/pci/vfio_pci_intrs.c   |   9 +
 drivers/vfio/pci/vfio_pci_private.h |   2 +
 include/linux/kvm_host.h|  19 ++
 virt/kvm/eventfd.c  |  31 +++-
 virt/kvm/irqchip.c  |  10 -
 virt/kvm/kvm_main.c |   3 +
 17 files changed, 687 insertions(+), 32 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v8 07/13] KVM: x86: Update IRTE for posted-interrupts

2015-09-16 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Move 'kvm_arch_update_pi_irte' to vmx.c as a callback
- Only update the PI irte when VM has assigned devices
- Add a trace point for VT-d posted-interrupts when we update
  or disable it for a specific irq.

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/trace.h| 33 
 arch/x86/kvm/vmx.c  | 83 +
 arch/x86/kvm/x86.c  |  2 +
 4 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index daa6126..8c44286 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -862,6 +862,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+
+   int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4eae7c3..539a9e4 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -974,6 +974,39 @@ TRACE_EVENT(kvm_enter_smm,
  __entry->smbase)
 );
 
+/*
+ * Tracepoint for VT-d posted-interrupts.
+ */
+TRACE_EVENT(kvm_pi_irte_update,
+   TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
+unsigned int gvec, u64 pi_desc_addr, bool set),
+   TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+
+   TP_STRUCT__entry(
+   __field(unsigned int,   vcpu_id )
+   __field(unsigned int,   gsi )
+   __field(unsigned int,   gvec)
+   __field(u64,pi_desc_addr)
+   __field(bool,   set )
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id= vcpu_id;
+   __entry->gsi= gsi;
+   __entry->gvec   = gvec;
+   __entry->pi_desc_addr   = pi_desc_addr;
+   __entry->set= set;
+   ),
+
+   TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+ "gvec: 0x%x, pi_desc_addr: 0x%llx",
+ __entry->set ? "enabled and being updated" : "disabled",
+ __entry->vcpu_id,
+ __entry->gsi,
+ __entry->gvec,
+ __entry->pi_desc_addr)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 316f9bf..5a25651 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 #include "pmu.h"
@@ -605,6 +606,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
@@ -10344,6 +10350,81 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP) ||
+   (!kvm_arch_has_assigned_device(kvm)))
+   return 0;
+
+   idx = srcu_read_lock(>irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, >irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, _rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a vCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we on

[PATCH v8 08/13] KVM: Implement IRQ bypass consumer callbacks for x86

2015-09-16 Thread Feng Wu
Implement the following callbacks for x86:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop: dummy callback
- kvm_arch_irq_bypass_resume: dummy callback

and set CONFIG_HAVE_KVM_IRQ_BYPASS for x86.

Signed-off-by: Feng Wu <feng...@intel.com>
---
v8:
- Move the weak irq bypas stop and irq bypass start to this patch.
- Call kvm_x86_ops->update_pi_irte() instead of kvm_arch_update_pi_irte().

 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Kconfig|  1 +
 arch/x86/kvm/x86.c  | 44 +
 virt/kvm/eventfd.c  | 12 +++
 4 files changed, 58 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8c44286..0ddd353 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c951d44..b90776f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9dcd501..79dac02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #define CREATE_TRACE_POINTS
@@ -8249,6 +8251,48 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   if (kvm_x86_ops->update_pi_irte) {
+   irqfd->producer = prod;
+   return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+   prod->irq, irqfd->gsi, 1);
+   }
+
+   return -EINVAL;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   if (!kvm_x86_ops->update_pi_irte) {
+   WARN_ON(irqfd->producer != NULL);
+   return;
+   }
+
+   WARN_ON(irqfd->producer != prod);
+   irqfd->producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+   */
+   ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+   if (ret)
+   printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
+  " fails: %d\n", irqfd->consumer.token, ret);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d7a230f..c0a56a1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -256,6 +256,18 @@ static void irqfd_update(struct kvm *kvm, struct 
kvm_kernel_irqfd *irqfd)
write_seqcount_end(>irq_entry_sc);
 }
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+   struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+   struct irq_bypass_consumer *cons)
+{
+}
+#endif
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 16/17] KVM: Warn if 'SN' is set during posting interrupts by software

2015-08-25 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64e35ea..eb640a1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4494,6 +4494,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu-mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(vmx-pi_desc));
+
apic-send_IPI_mask(get_cpu_mask(vcpu-cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 02/17] KVM: Add some helper functions for Posted-Interrupts

2015-08-25 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 271dd70..316f9bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc-pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 05/17] KVM: Add interfaces to control PI outside vmx

2015-08-25 Thread Feng Wu
This patch adds pi_clear_sn and pi_set_sn to struct kvm_x86_ops,
so we can set/clear SN outside vmx.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx.c  | 13 +
 2 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d50c1d3..c4f99f1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -860,6 +860,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
 
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+
+   void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
+   void (*pi_set_sn)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 81a995c..234f720 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -615,6 +615,16 @@ struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return (to_vmx(vcpu)-pi_desc);
 }
 
+static void vmx_pi_clear_sn(struct kvm_vcpu *vcpu)
+{
+   pi_clear_sn(vcpu_to_pi_desc(vcpu));
+}
+
+static void vmx_pi_set_sn(struct kvm_vcpu *vcpu)
+{
+   pi_set_sn(vcpu_to_pi_desc(vcpu));
+}
+
 static unsigned long shadow_read_only_fields[] = {
/*
 * We do NOT shadow fields that are modified when L0
@@ -10471,6 +10481,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
 
+   .pi_clear_sn = vmx_pi_clear_sn,
+   .pi_set_sn = vmx_pi_set_sn,
+
.pmu_ops = intel_pmu_ops,
 };
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 00/17] Add VT-d Posted-Interrupts support

2015-08-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

v7:
* Define two weak irq bypass callbacks:
  - kvm_arch_irq_bypass_start()
  - kvm_arch_irq_bypass_stop()
* Remove the x86 dummy implementation of the above two functions.
* Print some useful information instead of WARN_ON() when the
  irq bypass consumer unregistration fails.
* Fix an issue when calling pi_pre_block and pi_post_block.

v6:
* Rebase on 4.2.0-rc6
* Rebase on https://lkml.org/lkml/2015/8/6/526 and 
http://www.gossamer-threads.com/lists/linux/kernel/2235623
* Make the add_consumer and del_consumer callbacks static
* Remove pointless INIT_LIST_HEAD to 'vdev-ctx[vector].producer.node)'
* Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
* Remove optional dummy callbacks for irq producer

v4:
* For lowest-priority interrupt, only support single-CPU destination
interrupts at the current stage, more common lowest priority support
will be added later.
* Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
the posted-interrupts in the HLT emulation path.
* Some small changes (coding style, typo, add some code comments)

v3:
* Adjust the Posted-interrupts Descriptor updating logic when vCPU is
  preempted or blocked.
* KVM_DEV_VFIO_DEVICE_POSTING_IRQ -- KVM_DEV_VFIO_DEVICE_POST_IRQ
* __KVM_HAVE_ARCH_KVM_VFIO_POSTING -- __KVM_HAVE_ARCH_KVM_VFIO_POST
* Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
  can be used to change back to remapping mode.
* Fix typo

v2:
* Use VFIO framework to enable this feature, the VFIO part of this series is
  base on Eric's patch [PATCH v3 0/8] KVM-VFIO IRQ forward control
* Rebase this patchset on 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
  then revise some irq logic based on the new hierarchy irqdomain patches 
provided
  by Jiang Liu jiang@linux.intel.com

Feng Wu (17):
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Get Posted-Interrupts descriptor address from 'struct kvm_vcpu'
  KVM: Add interfaces to control PI outside vmx
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices
  vfio: Register/unregister irq_bypass_producer
  KVM: x86: Update IRTE for posted-interrupts
  KVM: Define two weak arch callbacks for irq bypass manager
  KVM: Implement IRQ bypass consumer callbacks for x86
  KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  KVM: Warn if 'SN' is set during posting interrupts by software
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 Documentation/kernel-parameters.txt |   1 +
 arch/x86/include/asm/kvm_host.h |  20 +++
 arch/x86/kvm/Kconfig|   1 +
 arch/x86/kvm/irq_comm.c |  28 +++-
 arch/x86/kvm/vmx.c  | 288 +++-
 arch/x86/kvm/x86.c  | 167 +++--
 drivers/iommu/irq_remapping.c   |  12 +-
 drivers/vfio/pci/Kconfig|   1 +
 drivers/vfio/pci/vfio_pci_intrs.c   |   9 ++
 drivers/vfio/pci/vfio_pci_private.h |   2 +
 include/linux/kvm_host.h|  28 
 include/linux/kvm_irqfd.h   |   2 +
 virt/kvm/eventfd.c  |  22 ++-
 virt/kvm/irqchip.c  |  10 --
 virt/kvm/kvm_main.c |   3 +
 15 files changed, 565 insertions(+), 29 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 06/17] KVM: Make struct kvm_irq_routing_table accessible

2015-08-25 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_host.h | 14 ++
 virt/kvm/irqchip.c   | 10 --
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac8d21..5f183fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..2cf45d3 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include trace/events/kvm.h
 #include irq.h
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 03/17] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-08-25 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/irq_comm.c | 24 
 2 files changed, 27 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..a9572a13 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,30 @@ out:
return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq-shorthand,
+   irq-dest_id, irq-dest_mode))
+   continue;
+
+   r++;
+   *dest_vcpu = vcpu;
+   }
+
+   if (r == 1)
+   return true;
+   else
+   return false;
+}
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 09/17] vfio: Register/unregister irq_bypass_producer

2015-08-25 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

v6:
- Make the add_consumer and del_consumer callbacks static
- Remove pointless INIT_LIST_HEAD to 'vdev-ctx[vector].producer.node)'
- Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
- Remove optional dummy callbacks for irq producer

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/vfio_pci_intrs.c   | 9 +
 drivers/vfio/pci/vfio_pci_private.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 1f577b4..c65299d 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev-ctx[vector].trigger) {
free_irq(irq, vdev-ctx[vector].trigger);
+   irq_bypass_unregister_producer(vdev-ctx[vector].producer);
kfree(vdev-ctx[vector].name);
eventfd_ctx_put(vdev-ctx[vector].trigger);
vdev-ctx[vector].trigger = NULL;
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
+   vdev-ctx[vector].producer.token = trigger;
+   vdev-ctx[vector].producer.irq = irq;
+   ret = irq_bypass_register_producer(vdev-ctx[vector].producer);
+   if (unlikely(ret))
+   dev_info(pdev-dev,
+   irq bypass producer (token %p) registeration fails: %d\n,
+   vdev-ctx[vector].producer.token, ret);
+
vdev-ctx[vector].trigger = trigger;
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include linux/mutex.h
 #include linux/pci.h
+#include linux/irqbypass.h
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 13/17] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-08-25 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c  |  5 +
 include/linux/kvm_host.h| 11 +++
 include/linux/kvm_irqfd.h   |  2 ++
 virt/kvm/eventfd.c  | 12 +++-
 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3038c1b..22269b4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -176,6 +176,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+#define __KVM_HAVE_ARCH_IRQFD_INIT 1
+
 struct kvm_kernel_irq_routing_entry;
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be4b561..ef93fdc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8355,6 +8355,11 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
fails: %d\n, irqfd-consumer.token, ret);
 }
 
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
+{
+   irqfd-arch_update = kvm_arch_update_pi_irte;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f183fb..f4005dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,8 @@
 
 #include asm/kvm_host.h
 
+struct kvm_kernel_irqfd;
+
 /*
  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
  * in kvm, other bits are visible for userspace which are defined in
@@ -1145,6 +1147,15 @@ extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
+#ifdef __KVM_HAVE_ARCH_IRQFD_INIT
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd);
+#else
+static inline void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
+{
+   irqfd-arch_update = NULL;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05..b7aab52 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -66,6 +66,8 @@ struct kvm_kernel_irqfd {
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+   int (*arch_update)(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set);
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f3050b9..b2d9066 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -288,6 +288,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
INIT_LIST_HEAD(irqfd-list);
INIT_WORK(irqfd-inject, irqfd_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
+   kvm_arch_irqfd_init(irqfd);
seqcount_init(irqfd-irq_entry_sc);
 
f = fdget(args-fd);
@@ -580,13 +581,22 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(kvm-irqfds.lock);
 
-   list_for_each_entry(irqfd, kvm-irqfds.items, list)
+   list_for_each_entry(irqfd, kvm-irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+   if (irqfd-arch_update  irqfd-producer) {
+   ret = irqfd-arch_update(
+   irqfd-kvm, irqfd-producer-irq,
+   irqfd-gsi, 1);
+   WARN_ON(ret);
+   }
+   }
+
spin_unlock_irq(kvm-irqfds.lock);
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 11/17] KVM: Define two weak arch callbacks for irq bypass manager

2015-08-25 Thread Feng Wu
Define two weak arch callbacks so that archs that don't need
them don't need define them.

Signed-off-by: Feng Wu feng...@intel.com
---
 virt/kvm/eventfd.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d7a230f..f3050b9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -256,6 +256,16 @@ static void irqfd_update(struct kvm *kvm, struct 
kvm_kernel_irqfd *irqfd)
write_seqcount_end(irqfd-irq_entry_sc);
 }
 
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+   struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+   struct irq_bypass_consumer *cons)
+{
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 10/17] KVM: x86: Update IRTE for posted-interrupts

2015-08-25 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/x86.c | 73 ++
 1 file changed, 73 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ef2560..8f09a76 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include asm/fpu/internal.h /* Ugh! */
 #include asm/pvclock.h
 #include asm/div64.h
+#include asm/irq_remapping.h
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -8248,6 +8249,78 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+/*
+ * kvm_arch_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success,  0 on failure
+ */
+int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(kvm-irq_srcu);
+   irq_rt = srcu_dereference(kvm-irq_routing, kvm-irq_srcu);
+   BUG_ON(guest_irq = irq_rt-nr_rt_entries);
+
+   hlist_for_each_entry(e, irq_rt-map[guest_irq], link) {
+   if (e-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, irq);
+   if (!kvm_intr_is_single_vcpu(kvm, irq, vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops-get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops-pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops-pi_clear_sn(vcpu);
+   }
+
+   if (ret  0) {
+   printk(KERN_INFO %s: failed to update PI IRTE\n,
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(kvm-irq_srcu, idx);
+   return ret;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 08/17] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices

2015-08-25 Thread Feng Wu
Enable irq bypass manager for vfio PCI devices.

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 579d83b..02912f1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
tristate VFIO support for PCI devices
depends on VFIO  PCI  EVENTFD
select VFIO_VIRQFD
+   select IRQ_BYPASS_MANAGER
help
  Support for the PCI VFIO bus driver.  This is required to make
  use of PCI drivers using the VFIO framework.
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 12/17] KVM: Implement IRQ bypass consumer callbacks for x86

2015-08-25 Thread Feng Wu
Implement the following callbacks for x86:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop: dummy callback
- kvm_arch_irq_bypass_resume: dummy callback

and set CONFIG_HAVE_KVM_IRQ_BYPASS for x86.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Kconfig|  1 +
 arch/x86/kvm/x86.c  | 34 ++
 3 files changed, 36 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 82d0709..3038c1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include linux/perf_event.h
 #include linux/pvclock_gtod.h
 #include linux/clocksource.h
+#include linux/irqbypass.h
 
 #include asm/pvclock-abi.h
 #include asm/desc.h
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c951d44..b90776f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f09a76..be4b561 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -50,6 +50,8 @@
 #include linux/pci.h
 #include linux/timekeeper_internal.h
 #include linux/pvclock_gtod.h
+#include linux/kvm_irqfd.h
+#include linux/irqbypass.h
 #include trace/events/kvm.h
 
 #define CREATE_TRACE_POINTS
@@ -8321,6 +8323,38 @@ out:
return ret;
 }
 
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = prod;
+
+   return kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 1);
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+   */
+   ret = kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 0);
+   if (ret)
+   printk(KERN_INFO irq bypass consumer (token %p) unregistration
+   fails: %d\n, irqfd-consumer.token, ret);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 14/17] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-08-25 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 51 +++
 1 file changed, 51 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 234f720..9c87064 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include asm/debugreg.h
 #include asm/kexec.h
 #include asm/apic.h
+#include asm/irq_remapping.h
 
 #include trace.h
 #include pmu.h
@@ -2001,10 +2002,60 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx-loaded_vmcs-cpu = cpu;
}
+
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   do {
+   old.control = new.control = pi_desc-control;
+
+   /*
+* If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+* are two possible cases:
+* 1. After running 'pi_pre_block', context switch
+*happened. For this case, 'sn' was set in
+*vmx_vcpu_put(), so we need to clear it here.
+* 2. After running 'pi_pre_block', we were blocked,
+*and woken up by some other guy. For this case,
+*we don't need to do anything, 'pi_post_block'
+*will do everything for us. However, we cannot
+*check whether it is case #1 or case #2 here
+*(maybe, not needed), so we also clear sn here,
+*I think it is not a big deal.
+*/
+   if (pi_desc-nv != POSTED_INTR_WAKEUP_VECTOR) {
+   if (vcpu-cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest  8)  0xFF00;
+   }
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+   } while (cmpxchg(pi_desc-control, old.control,
+   new.control) != old.control);
+   }
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu-preempted)
+   pi_set_sn(pi_desc);
+   }
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)-loaded_vmcs);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 01/17] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-08-25 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5c..271dd70 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,8 +446,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u16 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14;
+   /* bit 279:272 - Notification Vector */
+   u8  nv;
+   /* bit 287:280 - Reserved */
+   u8  rsvd_2;
+   /* bit 319:288 - Notification Destination */
+   u32 ndst;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 17/17] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2015-08-25 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu feng...@intel.com
---
 Documentation/kernel-parameters.txt |  1 +
 drivers/iommu/irq_remapping.c   | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1d6f045..52aca36 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1547,6 +1547,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
nosid   disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
+   nopost  disable Interrupt Posting
 
iomem=  Disable strict checking of access to MMIO memory
strict  regions from userspace.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d99930..d8c3997 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 
 static int disable_irq_remap;
 static struct irq_remap_ops *remap_ops;
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
 
while (*str) {
-   if (!strncmp(str, on, 2))
+   if (!strncmp(str, on, 2)) {
disable_irq_remap = 0;
-   else if (!strncmp(str, off, 3))
+   disable_irq_post = 0;
+   } else if (!strncmp(str, off, 3)) {
disable_irq_remap = 1;
-   else if (!strncmp(str, nosid, 5))
+   disable_irq_post = 1;
+   } else if (!strncmp(str, nosid, 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, no_x2apic_optout, 16))
no_x2apic_optout = 1;
+   else if (!strncmp(str, nopost, 6))
+   disable_irq_post = 1;
 
str += strcspn(str, ,);
while (*str == ',')
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 15/17] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-08-25 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |   5 ++
 arch/x86/kvm/vmx.c  | 151 
 arch/x86/kvm/x86.c  |  55 ---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 207 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 22269b4..32af275 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -554,6 +554,8 @@ struct kvm_vcpu_arch {
 */
bool write_fault_to_shadow_pgtable;
 
+   bool halted;
+
/* set at EPT violation at this point */
unsigned long exit_qualification;
 
@@ -868,6 +870,9 @@ struct kvm_x86_ops {
 
void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
void (*pi_set_sn)(struct kvm_vcpu *vcpu);
+
+   int (*pi_pre_block)(struct kvm_vcpu *vcpu);
+   void (*pi_post_block)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9c87064..64e35ea 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -888,6 +888,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2981,6 +2988,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6106,6 +6115,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6290,6 +6318,8 @@ static __init int hardware_setup(void)
kvm_x86_ops-enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10414,6 +10444,124 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *  'NDST' -- vcpu-pre_pcpu
+ *  'NV' -- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pi_pre_block(struct kvm_vcpu *vcpu)
+{
+   unsigned long flags;
+   unsigned int dest;
+   struct pi_desc old, new;
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   vcpu-pre_pcpu = vcpu-cpu;
+   spin_lock_irqsave(per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu-pre_pcpu), flags);
+   list_add_tail(vcpu-blocked_vcpu_list,
+ per_cpu(blocked_vcpu_on_cpu,
+ vcpu-pre_pcpu));
+   spin_unlock_irqrestore(per_cpu(blocked_vcpu_on_cpu_lock,
+  vcpu-pre_pcpu), flags);
+
+   do {
+   old.control = new.control = pi_desc-control;
+
+   /*
+* We should not block the vCPU if
+* an interrupt is posted

[PATCH v7 04/17] KVM: Get Posted-Interrupts descriptor address from 'struct kvm_vcpu'

2015-08-25 Thread Feng Wu
Define an interface to get PI descriptor address from the vCPU structure.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c  | 11 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af11bca..d50c1d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -858,6 +858,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t offset, unsigned long mask);
+
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 316f9bf..81a995c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -610,6 +610,10 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return (to_vmx(vcpu)-pi_desc);
+}
 
 static unsigned long shadow_read_only_fields[] = {
/*
@@ -4487,6 +4491,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -10460,6 +10469,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
+
.pmu_ops = intel_pmu_ops,
 };
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 07/17] KVM: make kvm_set_msi_irq() public

2015-08-25 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c4f99f1..82d0709 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1212,4 +1214,6 @@ int x86_set_memory_region(struct kvm *kvm,
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
 
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index a9572a13..1319c60 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e-msi.address_lo, e-msi.data);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 12/16] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-08-11 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c  |  5 +
 include/linux/kvm_host.h| 11 +++
 include/linux/kvm_irqfd.h   |  2 ++
 virt/kvm/eventfd.c  | 12 +++-
 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3038c1b..22269b4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -176,6 +176,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+#define __KVM_HAVE_ARCH_IRQFD_INIT 1
+
 struct kvm_kernel_irq_routing_entry;
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8df7b0d..74a1d83 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8360,6 +8360,11 @@ void kvm_arch_irq_bypass_start(struct 
irq_bypass_consumer *cons)
 {
 }
 
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
+{
+   irqfd-arch_update = kvm_arch_update_pi_irte;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f183fb..f4005dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,8 @@
 
 #include asm/kvm_host.h
 
+struct kvm_kernel_irqfd;
+
 /*
  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
  * in kvm, other bits are visible for userspace which are defined in
@@ -1145,6 +1147,15 @@ extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
+#ifdef __KVM_HAVE_ARCH_IRQFD_INIT
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd);
+#else
+static inline void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
+{
+   irqfd-arch_update = NULL;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05..b7aab52 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -66,6 +66,8 @@ struct kvm_kernel_irqfd {
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+   int (*arch_update)(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set);
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d7a230f..3204283 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -278,6 +278,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
INIT_LIST_HEAD(irqfd-list);
INIT_WORK(irqfd-inject, irqfd_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
+   kvm_arch_irqfd_init(irqfd);
seqcount_init(irqfd-irq_entry_sc);
 
f = fdget(args-fd);
@@ -570,13 +571,22 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(kvm-irqfds.lock);
 
-   list_for_each_entry(irqfd, kvm-irqfds.items, list)
+   list_for_each_entry(irqfd, kvm-irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+   if (irqfd-arch_update  irqfd-producer) {
+   ret = irqfd-arch_update(
+   irqfd-kvm, irqfd-producer-irq,
+   irqfd-gsi, 1);
+   WARN_ON(ret);
+   }
+   }
+
spin_unlock_irq(kvm-irqfds.lock);
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 13/16] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-08-11 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 234f720..20472ad 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include asm/debugreg.h
 #include asm/kexec.h
 #include asm/apic.h
+#include asm/irq_remapping.h
 
 #include trace.h
 #include pmu.h
@@ -2001,10 +2002,43 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx-loaded_vmcs-cpu = cpu;
}
+
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   do {
+   old.control = new.control = pi_desc-control;
+   if (vcpu-cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest  8)  0xFF00;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   } while (cmpxchg(pi_desc-control, old.control,
+   new.control) != old.control);
+   }
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu-preempted)
+   pi_set_sn(pi_desc);
+   }
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)-loaded_vmcs);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 11/16] KVM: Implement IRQ bypass consumer callbacks for x86

2015-08-11 Thread Feng Wu
Implement the following callbacks for x86:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop: dummy callback
- kvm_arch_irq_bypass_resume: dummy callback

and set CONFIG_HAVE_KVM_IRQ_BYPASS for x86.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Kconfig|  1 +
 arch/x86/kvm/x86.c  | 39 +++
 3 files changed, 41 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 82d0709..3038c1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include linux/perf_event.h
 #include linux/pvclock_gtod.h
 #include linux/clocksource.h
+#include linux/irqbypass.h
 
 #include asm/pvclock-abi.h
 #include asm/desc.h
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c951d44..b90776f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f09a76..8df7b0d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -50,6 +50,8 @@
 #include linux/pci.h
 #include linux/timekeeper_internal.h
 #include linux/pvclock_gtod.h
+#include linux/kvm_irqfd.h
+#include linux/irqbypass.h
 #include trace/events/kvm.h
 
 #define CREATE_TRACE_POINTS
@@ -8321,6 +8323,43 @@ out:
return ret;
 }
 
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = prod;
+
+   return kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 1);
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+   */
+   ret = kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 0);
+   WARN_ON(ret);
+}
+
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
+{
+}
+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
+{
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 07/16] KVM: make kvm_set_msi_irq() public

2015-08-11 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c4f99f1..82d0709 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1212,4 +1214,6 @@ int x86_set_memory_region(struct kvm *kvm,
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
 
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index a9572a13..1319c60 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e-msi.address_lo, e-msi.data);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 09/16] vfio: Register/unregister irq_bypass_producer

2015-08-11 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

v6:
- Make the add_consumer and del_consumer callbacks static
- Remove pointless INIT_LIST_HEAD to 'vdev-ctx[vector].producer.node)'
- Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
- Remove optional dummy callbacks for irq producer

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/vfio_pci_intrs.c   | 9 +
 drivers/vfio/pci/vfio_pci_private.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 1f577b4..c65299d 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev-ctx[vector].trigger) {
free_irq(irq, vdev-ctx[vector].trigger);
+   irq_bypass_unregister_producer(vdev-ctx[vector].producer);
kfree(vdev-ctx[vector].name);
eventfd_ctx_put(vdev-ctx[vector].trigger);
vdev-ctx[vector].trigger = NULL;
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
+   vdev-ctx[vector].producer.token = trigger;
+   vdev-ctx[vector].producer.irq = irq;
+   ret = irq_bypass_register_producer(vdev-ctx[vector].producer);
+   if (unlikely(ret))
+   dev_info(pdev-dev,
+   irq bypass producer (token %p) registeration fails: %d\n,
+   vdev-ctx[vector].producer.token, ret);
+
vdev-ctx[vector].trigger = trigger;
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include linux/mutex.h
 #include linux/pci.h
+#include linux/irqbypass.h
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 08/16] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices

2015-08-11 Thread Feng Wu
Enable irq bypass manager for vfio PCI devices.

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 579d83b..02912f1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
tristate VFIO support for PCI devices
depends on VFIO  PCI  EVENTFD
select VFIO_VIRQFD
+   select IRQ_BYPASS_MANAGER
help
  Support for the PCI VFIO bus driver.  This is required to make
  use of PCI drivers using the VFIO framework.
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 06/16] KVM: Make struct kvm_irq_routing_table accessible

2015-08-11 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_host.h | 14 ++
 virt/kvm/irqchip.c   | 10 --
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac8d21..5f183fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..2cf45d3 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include trace/events/kvm.h
 #include irq.h
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 10/16] KVM: x86: Update IRTE for posted-interrupts

2015-08-11 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/x86.c | 73 ++
 1 file changed, 73 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ef2560..8f09a76 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include asm/fpu/internal.h /* Ugh! */
 #include asm/pvclock.h
 #include asm/div64.h
+#include asm/irq_remapping.h
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -8248,6 +8249,78 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+/*
+ * kvm_arch_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success,  0 on failure
+ */
+int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(kvm-irq_srcu);
+   irq_rt = srcu_dereference(kvm-irq_routing, kvm-irq_srcu);
+   BUG_ON(guest_irq = irq_rt-nr_rt_entries);
+
+   hlist_for_each_entry(e, irq_rt-map[guest_irq], link) {
+   if (e-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, irq);
+   if (!kvm_intr_is_single_vcpu(kvm, irq, vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops-get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops-pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops-pi_clear_sn(vcpu);
+   }
+
+   if (ret  0) {
+   printk(KERN_INFO %s: failed to update PI IRTE\n,
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(kvm-irq_srcu, idx);
+   return ret;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 16/16] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2015-08-11 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu feng...@intel.com
---
 Documentation/kernel-parameters.txt |  1 +
 drivers/iommu/irq_remapping.c   | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1d6f045..52aca36 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1547,6 +1547,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
nosid   disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
+   nopost  disable Interrupt Posting
 
iomem=  Disable strict checking of access to MMIO memory
strict  regions from userspace.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d99930..d8c3997 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 
 static int disable_irq_remap;
 static struct irq_remap_ops *remap_ops;
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
 
while (*str) {
-   if (!strncmp(str, on, 2))
+   if (!strncmp(str, on, 2)) {
disable_irq_remap = 0;
-   else if (!strncmp(str, off, 3))
+   disable_irq_post = 0;
+   } else if (!strncmp(str, off, 3)) {
disable_irq_remap = 1;
-   else if (!strncmp(str, nosid, 5))
+   disable_irq_post = 1;
+   } else if (!strncmp(str, nosid, 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, no_x2apic_optout, 16))
no_x2apic_optout = 1;
+   else if (!strncmp(str, nopost, 6))
+   disable_irq_post = 1;
 
str += strcspn(str, ,);
while (*str == ',')
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 14/16] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-08-11 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/vmx.c  | 158 
 arch/x86/kvm/x86.c  |  42 ---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 199 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 22269b4..6bc11fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -868,6 +868,9 @@ struct kvm_x86_ops {
 
void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
void (*pi_set_sn)(struct kvm_vcpu *vcpu);
+
+   int (*pi_pre_block)(struct kvm_vcpu *vcpu);
+   void (*pi_post_block)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 20472ad..6b50eba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -888,6 +888,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2964,6 +2971,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6089,6 +6098,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6273,6 +6301,8 @@ static __init int hardware_setup(void)
kvm_x86_ops-enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10397,6 +10427,131 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *  'NDST' -- vcpu-pre_pcpu
+ *  'NV' -- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pi_pre_block(struct kvm_vcpu *vcpu)
+{
+   unsigned long flags;
+   unsigned int dest;
+   struct pi_desc old, new;
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   vcpu-pre_pcpu = vcpu-cpu;
+   spin_lock_irqsave(per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu-pre_pcpu), flags);
+   list_add_tail(vcpu-blocked_vcpu_list,
+ per_cpu(blocked_vcpu_on_cpu,
+ vcpu-pre_pcpu));
+   spin_unlock_irqrestore(per_cpu(blocked_vcpu_on_cpu_lock,
+  vcpu-pre_pcpu), flags);
+
+   do {
+   old.control = new.control = pi_desc-control;
+
+   /*
+* We should not block the vCPU if
+* an interrupt is posted for it.
+*/
+   if (pi_test_on(pi_desc) == 1) {
+   spin_lock_irqsave(per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu-pre_pcpu), flags

[PATCH v6 15/16] KVM: Warn if 'SN' is set during posting interrupts by software

2015-08-11 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6b50eba..b4f5600 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4477,6 +4477,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu-mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(vmx-pi_desc));
+
apic-send_IPI_mask(get_cpu_mask(vcpu-cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 01/16] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-08-11 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83b7b5c..271dd70 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,8 +446,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u16 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14;
+   /* bit 279:272 - Notification Vector */
+   u8  nv;
+   /* bit 287:280 - Reserved */
+   u8  rsvd_2;
+   /* bit 319:288 - Notification Destination */
+   u32 ndst;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 00/16] Add VT-d Posted-Interrupts support

2015-08-11 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

v6:
* Rebase on 4.2.0-rc6
* Rebase on https://lkml.org/lkml/2015/8/6/526 and 
http://www.gossamer-threads.com/lists/linux/kernel/2235623
* Make the add_consumer and del_consumer callbacks static
* Remove pointless INIT_LIST_HEAD to 'vdev-ctx[vector].producer.node)'
* Use dev_info instead of WARN_ON() when irq_bypass_register_producer fails
* Remove optional dummy callbacks for irq producer

v4:
* For lowest-priority interrupt, only support single-CPU destination
interrupts at the current stage, more common lowest priority support
will be added later.
* Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
the posted-interrupts in the HLT emulation path.
* Some small changes (coding style, typo, add some code comments)

v3:
* Adjust the Posted-interrupts Descriptor updating logic when vCPU is
  preempted or blocked.
* KVM_DEV_VFIO_DEVICE_POSTING_IRQ -- KVM_DEV_VFIO_DEVICE_POST_IRQ
* __KVM_HAVE_ARCH_KVM_VFIO_POSTING -- __KVM_HAVE_ARCH_KVM_VFIO_POST
* Add KVM_DEV_VFIO_DEVICE_UNPOST_IRQ attribute for VFIO irq, which
  can be used to change back to remapping mode.
* Fix typo

v2:
* Use VFIO framework to enable this feature, the VFIO part of this series is
  base on Eric's patch [PATCH v3 0/8] KVM-VFIO IRQ forward control
* Rebase this patchset on 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
  then revise some irq logic based on the new hierarchy irqdomain patches 
provided
  by Jiang Liu jiang@linux.intel.com

Feng Wu (16):
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Get Posted-Interrupts descriptor address from 'struct kvm_vcpu'
  KVM: Add interfaces to control PI outside vmx
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices
  vfio: Register/unregister irq_bypass_producer
  KVM: x86: Update IRTE for posted-interrupts
  KVM: Implement IRQ bypass consumer callbacks for x86
  KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  KVM: Warn if 'SN' is set during posting interrupts by software
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 Documentation/kernel-parameters.txt |   1 +
 arch/x86/include/asm/kvm_host.h |  18 +++
 arch/x86/kvm/Kconfig|   1 +
 arch/x86/kvm/irq_comm.c |  28 +++-
 arch/x86/kvm/vmx.c  | 278 +++-
 arch/x86/kvm/x86.c  | 159 +++--
 drivers/iommu/irq_remapping.c   |  12 +-
 drivers/vfio/pci/Kconfig|   1 +
 drivers/vfio/pci/vfio_pci_intrs.c   |   9 ++
 drivers/vfio/pci/vfio_pci_private.h |   2 +
 include/linux/kvm_host.h|  28 
 include/linux/kvm_irqfd.h   |   2 +
 virt/kvm/eventfd.c  |  12 +-
 virt/kvm/irqchip.c  |  10 --
 virt/kvm/kvm_main.c |   3 +
 15 files changed, 535 insertions(+), 29 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 02/16] KVM: Add some helper functions for Posted-Interrupts

2015-08-11 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 271dd70..316f9bf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc-pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 05/16] KVM: Add interfaces to control PI outside vmx

2015-08-11 Thread Feng Wu
This patch adds pi_clear_sn and pi_set_sn to struct kvm_x86_ops,
so we can set/clear SN outside vmx.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx.c  | 13 +
 2 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d50c1d3..c4f99f1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -860,6 +860,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
 
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+
+   void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
+   void (*pi_set_sn)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 81a995c..234f720 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -615,6 +615,16 @@ struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return (to_vmx(vcpu)-pi_desc);
 }
 
+static void vmx_pi_clear_sn(struct kvm_vcpu *vcpu)
+{
+   pi_clear_sn(vcpu_to_pi_desc(vcpu));
+}
+
+static void vmx_pi_set_sn(struct kvm_vcpu *vcpu)
+{
+   pi_set_sn(vcpu_to_pi_desc(vcpu));
+}
+
 static unsigned long shadow_read_only_fields[] = {
/*
 * We do NOT shadow fields that are modified when L0
@@ -10471,6 +10481,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
 
+   .pi_clear_sn = vmx_pi_clear_sn,
+   .pi_set_sn = vmx_pi_set_sn,
+
.pmu_ops = intel_pmu_ops,
 };
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 03/16] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-08-11 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/irq_comm.c | 24 
 2 files changed, 27 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..a9572a13 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,30 @@ out:
return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq-shorthand,
+   irq-dest_id, irq-dest_mode))
+   continue;
+
+   r++;
+   *dest_vcpu = vcpu;
+   }
+
+   if (r == 1)
+   return true;
+   else
+   return false;
+}
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 04/16] KVM: Get Posted-Interrupts descriptor address from 'struct kvm_vcpu'

2015-08-11 Thread Feng Wu
Define an interface to get PI descriptor address from the vCPU structure.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c  | 11 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af11bca..d50c1d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -858,6 +858,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t offset, unsigned long mask);
+
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 316f9bf..81a995c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -610,6 +610,10 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return (to_vmx(vcpu)-pi_desc);
+}
 
 static unsigned long shadow_read_only_fields[] = {
/*
@@ -4487,6 +4491,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -10460,6 +10469,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
+
.pmu_ops = intel_pmu_ops,
 };
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v3 4/5] KVM: introduce kvm_arch functions for IRQ bypass

2015-07-29 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

This patch introduces
- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop
- kvm_arch_irq_bypass_start

They make possible to specialize the KVM IRQ bypass consumer in
case CONFIG_KVM_HAVE_IRQ_BYPASS is set.

v2 - v3:
- use 'kvm_arch_irq_bypass_start' instead of 'kvm_arch_irq_bypass_resume'
- Remove 'kvm_arch_irq_bypass_update', which is not needed to be
  a irqbypass callback per Alex's comments.
- Make kvm_arch_irq_bypass_add_producer return 'int'

v1 - v2:
- use CONFIG_KVM_HAVE_IRQ_BYPASS instead CONFIG_IRQ_BYPASS_MANAGER
- rename all functions according to Paolo's proposal
- add kvm_arch_irq_bypass_update according to Feng's need

Signed-off-by: Eric Auger eric.au...@linaro.org
Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_host.h | 33 +
 virt/kvm/Kconfig |  3 +++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 05e99b8..84b5feb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -24,6 +24,7 @@
 #include linux/err.h
 #include linux/irqflags.h
 #include linux/context_tracking.h
+#include linux/irqbypass.h
 #include asm/signal.h
 
 #include linux/kvm.h
@@ -1151,5 +1152,37 @@ static inline void kvm_vcpu_set_dy_eligible(struct 
kvm_vcpu *vcpu, bool val)
 {
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
+  struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
+  struct irq_bypass_producer *);
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
+
+#else
+
+static inline int kvm_arch_irq_bypass_add_producer(
+   struct irq_bypass_consumer *cons,
+   struct irq_bypass_producer *prod)
+{
+   return -1;
+}
+static inline void kvm_arch_irq_bypass_del_producer(
+   struct irq_bypass_consumer *cons,
+   struct irq_bypass_producer *prod)
+{
+}
+static inline void kvm_arch_irq_bypass_stop(
+   struct irq_bypass_consumer *cons)
+{
+}
+static inline void kvm_arch_irq_bypass_start(
+   struct irq_bypass_consumer *cons)
+{
+}
+#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 #endif
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index e2c876d..9f8014d 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 config KVM_COMPAT
def_bool y
depends on COMPAT  !S390
+
+config HAVE_KVM_IRQ_BYPASS
+   bool
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v3 1/5] KVM: x86: select IRQ_BYPASS_MANAGER

2015-07-29 Thread Feng Wu
Select IRQ_BYPASS_MANAGER for x86 when CONFIG_KVM is set

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/Kconfig  | 2 ++
 arch/x86/kvm/Makefile | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d8a1d56..c951d44 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -3,6 +3,7 @@
 #
 
 source virt/kvm/Kconfig
+source virt/lib/Kconfig
 
 menuconfig VIRTUALIZATION
bool Virtualization
@@ -28,6 +29,7 @@ config KVM
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215c..05cc2d7 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,6 +6,9 @@ CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
+LIB := ../../../virt/lib
+
+obj-$(CONFIG_IRQ_BYPASS_MANAGER)   += $(LIB)/
 
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v3 3/5] KVM: create kvm_irqfd.h

2015-07-29 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

Move _irqfd_resampler and _irqfd struct declarations in a new
public header: kvm_irqfd.h. They are respectively renamed into
kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
will be used by architecture specific code, in the context of
IRQ bypass manager integration.

Signed-off-by: Eric Auger eric.au...@linaro.org
---
 include/linux/kvm_irqfd.h | 69 ++
 virt/kvm/eventfd.c| 95 ---
 2 files changed, 92 insertions(+), 72 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
new file mode 100644
index 000..f926b39
--- /dev/null
+++ b/include/linux/kvm_irqfd.h
@@ -0,0 +1,69 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * Credit goes to Avi Kivity for the original idea.
+ */
+
+#ifndef __LINUX_KVM_IRQFD_H
+#define __LINUX_KVM_IRQFD_H
+
+#include linux/kvm_host.h
+#include linux/poll.h
+
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgment through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct kvm_kernel_irqfd_resampler {
+   struct kvm *kvm;
+   /*
+* List of resampling struct _irqfd objects sharing this gsi.
+* RCU list modified under kvm-irqfds.resampler_lock
+*/
+   struct list_head list;
+   struct kvm_irq_ack_notifier notifier;
+   /*
+* Entry in list of kvm-irqfd.resampler_list.  Use for sharing
+* resamplers among irqfds on the same gsi.
+* Accessed and modified under kvm-irqfds.resampler_lock
+*/
+   struct list_head link;
+};
+
+struct kvm_kernel_irqfd {
+   /* Used for MSI fast-path */
+   struct kvm *kvm;
+   wait_queue_t wait;
+   /* Update side is protected by irqfds.lock */
+   struct kvm_kernel_irq_routing_entry irq_entry;
+   seqcount_t irq_entry_sc;
+   /* Used for level IRQ fast-path */
+   int gsi;
+   struct work_struct inject;
+   /* The resampler used by this irqfd (resampler-only) */
+   struct kvm_kernel_irqfd_resampler *resampler;
+   /* Eventfd notified on resample (resampler-only) */
+   struct eventfd_ctx *resamplefd;
+   /* Entry in list of irqfds for a resampler (resampler-only) */
+   struct list_head resampler_link;
+   /* Used for setup/shutdown */
+   struct eventfd_ctx *eventfd;
+   struct list_head list;
+   poll_table pt;
+   struct work_struct shutdown;
+};
+
+#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9ff4193..647ffb8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
 #include linux/kvm_host.h
 #include linux/kvm.h
+#include linux/kvm_irqfd.h
 #include linux/workqueue.h
 #include linux/syscalls.h
 #include linux/wait.h
@@ -39,68 +40,14 @@
 #include kvm/iodev.h
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * 
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * 
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-   struct kvm *kvm;
-   /*
-* List of resampling struct _irqfd objects sharing this gsi.
-* RCU list modified under kvm-irqfds.resampler_lock
-*/
-   struct list_head list;
-   struct kvm_irq_ack_notifier notifier;
-   /*
-* Entry in list of kvm-irqfd.resampler_list.  Use for sharing
-* resamplers among irqfds on the same gsi.
-* 

[v3 5/5] KVM: eventfd: add irq bypass consumer management

2015-07-29 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

This patch adds the registration/unregistration of an
irq_bypass_consumer on irqfd assignment/deassignment.

v2 - v3:
- Use kvm_arch_irq_bypass_start
- Remove kvm_arch_irq_bypass_update
- Add member 'struct irq_bypass_producer *producer' in
  'struct kvm_kernel_irqfd', it is needed by posted interrupt.
- Remove 'irq_bypass_unregister_consumer' in kvm_irqfd_deassign()

v1 - v2:
- populate of kvm and gsi removed
- unregister the consumer on irqfd_shutdown

Signed-off-by: Eric Auger eric.au...@linaro.org
Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_irqfd.h |  2 ++
 virt/kvm/eventfd.c| 10 ++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index f926b39..0c1de05 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -64,6 +64,8 @@ struct kvm_kernel_irqfd {
struct list_head list;
poll_table pt;
struct work_struct shutdown;
+   struct irq_bypass_consumer consumer;
+   struct irq_bypass_producer *producer;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 647ffb8..08855de 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 #include linux/srcu.h
 #include linux/slab.h
 #include linux/seqlock.h
+#include linux/irqbypass.h
 #include trace/events/kvm.h
 
 #include kvm/iodev.h
@@ -140,6 +141,7 @@ irqfd_shutdown(struct work_struct *work)
/*
 * It is now safe to release the object's resources
 */
+   irq_bypass_unregister_consumer(irqfd-consumer);
eventfd_ctx_put(irqfd-eventfd);
kfree(irqfd);
 }
@@ -380,6 +382,14 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 */
fdput(f);
 
+   irqfd-consumer.token = (void *)irqfd-eventfd;
+   irqfd-consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+   irqfd-consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+   irqfd-consumer.stop = kvm_arch_irq_bypass_stop;
+   irqfd-consumer.start = kvm_arch_irq_bypass_start;
+   ret = irq_bypass_register_consumer(irqfd-consumer);
+   WARN_ON(ret);
+
return 0;
 
 fail:
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v3 0/5] KVM: irqfd consumer based on IRQ bypass manager

2015-07-29 Thread Feng Wu
This series is based on Eric's https://lkml.org/lkml/2015/7/6/291;.
Basically, I made the following chagnes:
- Some changes based on Alex's irq bypass manager
  v2: https://lkml.org/lkml/2015/7/16/810
- Correct some minor erros and typo
- Add something needed for posted-interrupts

Since this series contain the common part of posted-interrupts and
forwarded irq, we can go forward only when this series are finalized,
,it is in the critical path. Seems Eric is on vacations these days,
to speed up the process, I send out this new version, Eric, hope you
are not mind of this. :) If I made any mistakes, please correct me.
Thank you!

My new version of posted-interrupts patches work well based on this
series.

Eric Auger (4):
  KVM: arm/arm64: select IRQ_BYPASS_MANAGER
  KVM: create kvm_irqfd.h
  KVM: introduce kvm_arch functions for IRQ bypass
  KVM: eventfd: add irq bypass consumer management

Feng Wu (1):
  KVM: x86: select IRQ_BYPASS_MANAGER

 arch/arm/kvm/Kconfig  |   1 +
 arch/arm64/kvm/Kconfig|   1 +
 arch/x86/kvm/Kconfig  |   2 +
 arch/x86/kvm/Makefile |   3 ++
 include/linux/kvm_host.h  |  33 +++
 include/linux/kvm_irqfd.h |  71 +++
 virt/kvm/Kconfig  |   3 ++
 virt/kvm/eventfd.c| 105 +++---
 8 files changed, 147 insertions(+), 72 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v3 2/5] KVM: arm/arm64: select IRQ_BYPASS_MANAGER

2015-07-29 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

Select IRQ_BYPASS_MANAGER when CONFIG_KVM is set

v2 - v3:
- Correct a typo in 'arch/arm64/kvm/Kconfig'

v1 - v2:
- also set IRQ_BYPASS_MANAGER for arm64

Signed-off-by: Eric Auger eric.au...@linaro.org
Signed-off-by: Feng Wu feng...@intel.com
---
 arch/arm/kvm/Kconfig   | 1 +
 arch/arm64/kvm/Kconfig | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index bfb915d..7d38d25 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -31,6 +31,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
depends on ARM_VIRT_EXT  ARM_LPAE  ARM_ARCH_TIMER
---help---
  Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index bfffe8f..dfaca85 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -31,6 +31,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+   select IRQ_BYPASS_MANAGER
---help---
  Support hosting virtualized guest machines.
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 06/19] KVM: Make struct kvm_irq_routing_table accessible

2015-07-13 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_host.h | 15 +++
 virt/kvm/irqchip.c   | 11 ---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054..f591f7c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -321,6 +321,21 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   struct kvm_kernel_irq_routing_entry *rt_entries;
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 1d56a90..bac3b52 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
 #include trace/events/kvm.h
 #include irq.h
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   struct kvm_kernel_irq_routing_entry *rt_entries;
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 13/19] KVM: x86: Update IRTE for posted-interrupts

2015-07-13 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/x86.c | 73 ++
 1 file changed, 73 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 26eaeb5..d81ac02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include asm/fpu/internal.h /* Ugh! */
 #include asm/pvclock.h
 #include asm/div64.h
+#include asm/irq_remapping.h
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -7950,6 +7951,78 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+/*
+ * kvm_arch_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success,  0 on failure
+ */
+int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(kvm-irq_srcu);
+   irq_rt = srcu_dereference(kvm-irq_routing, kvm-irq_srcu);
+   BUG_ON(guest_irq = irq_rt-nr_rt_entries);
+
+   hlist_for_each_entry(e, irq_rt-map[guest_irq], link) {
+   if (e-type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, irq);
+   if (!kvm_intr_is_single_vcpu(kvm, irq, vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops-get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops-pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops-pi_clear_sn(vcpu);
+   }
+
+   if (ret  0) {
+   printk(KERN_INFO %s: failed to update PI IRTE\n,
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(kvm-irq_srcu, idx);
+   return ret;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 19/19] KVM: Warn if 'SN' is set during posting interrupts by software

2015-07-13 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cecd018..d4d5abc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4484,6 +4484,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu-mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(vmx-pi_desc));
+
apic-send_IPI_mask(get_cpu_mask(vcpu-cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 04/19] KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu

2015-07-13 Thread Feng Wu
Define an interface to get PI descriptor address from the vCPU structure.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c  | 11 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b8832e5..9df0724 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -836,6 +836,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t offset, unsigned long mask);
+
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1e815b6..1b33e33 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -609,6 +609,10 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return (to_vmx(vcpu)-pi_desc);
+}
 
 static unsigned long shadow_read_only_fields[] = {
/*
@@ -4494,6 +4498,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -10296,6 +10305,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
 };
 
 static int __init vmx_init(void)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 00/19] Add VT-d Posted-Interrupts support

2015-07-13 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This series was part of http://thread.gmane.org/gmane.linux.kernel.iommu/7708. 
To make things clear, send out IOMMU part here.

This patch-set is based on the lastest x86/apic branch of tip tree.

Divide the whole series which contain multiple components into three parts:
- Prerequisite changes to irq subsystem (already merged)
- IOMMU part (already merged)
- KVM and VFIO parts (this series)

v5:
- Based on Alex and Eric's irq bypass manager:
https://lkml.org/lkml/2015/7/10/663
- Reuse some common patch from Eric

Eric Auger (3):
  KVM: create kvm_irqfd.h
  KVM: eventfd: add irq bypass information in irqfd
  KVM: eventfd: add irq bypass consumer management

Feng Wu (16):
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu
  KVM: Add interfaces to control PI outside vmx
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices
  vfio: Register/unregister irq_bypass_producer
  KVM, x86: Select IRQ_BYPASS_MANAGER for KVM_INTEL
  KVM: x86: Update IRTE for posted-interrupts
  KVM: x86: Add arch specific routines for irqbypass manager
  KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  KVM: Warn if 'SN' is set during posting interrupts by software

 arch/x86/include/asm/kvm_host.h |  15 ++
 arch/x86/kvm/Kconfig|   1 +
 arch/x86/kvm/irq_comm.c |  28 +++-
 arch/x86/kvm/vmx.c  | 278 +++-
 arch/x86/kvm/x86.c  | 160 +++--
 drivers/vfio/pci/Kconfig|   1 +
 drivers/vfio/pci/vfio_pci_intrs.c   |  19 +++
 drivers/vfio/pci/vfio_pci_private.h |   2 +
 include/linux/kvm_host.h|  23 +++
 include/linux/kvm_irqfd.h   |  74 ++
 virt/kvm/eventfd.c  | 115 ++-
 virt/kvm/irqchip.c  |  11 --
 virt/kvm/kvm_main.c |   3 +
 13 files changed, 632 insertions(+), 98 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 08/19] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices

2015-07-13 Thread Feng Wu
Enable irq bypass manager for vfio PCI devices.

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 579d83b..02912f1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
tristate VFIO support for PCI devices
depends on VFIO  PCI  EVENTFD
select VFIO_VIRQFD
+   select IRQ_BYPASS_MANAGER
help
  Support for the PCI VFIO bus driver.  This is required to make
  use of PCI drivers using the VFIO framework.
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 02/19] KVM: Add some helper functions for Posted-Interrupts

2015-07-13 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 765539e..1e815b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -442,6 +442,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -482,6 +484,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc-pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)pi_desc-control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)pi_desc-control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 01/19] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-07-13 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e11dd59..765539e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -445,8 +445,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u16 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14;
+   /* bit 279:272 - Notification Vector */
+   u8  nv;
+   /* bit 287:280 - Reserved */
+   u8  rsvd_2;
+   /* bit 319:288 - Notification Destination */
+   u32 ndst;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 07/19] KVM: make kvm_set_msi_irq() public

2015-07-13 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 739fd14..1b0278e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1187,4 +1189,6 @@ void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9e42645..58d7d49 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -94,8 +94,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e-msi.address_lo, e-msi.data);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 05/19] KVM: Add interfaces to control PI outside vmx

2015-07-13 Thread Feng Wu
This patch adds pi_clear_sn and pi_set_sn to struct kvm_x86_ops,
so we can set/clear SN outside vmx.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx.c  | 13 +
 2 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9df0724..739fd14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -838,6 +838,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
 
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+
+   void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
+   void (*pi_set_sn)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1b33e33..35ef4c6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -614,6 +614,16 @@ struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return (to_vmx(vcpu)-pi_desc);
 }
 
+static void vmx_pi_clear_sn(struct kvm_vcpu *vcpu)
+{
+   pi_clear_sn(vcpu_to_pi_desc(vcpu));
+}
+
+static void vmx_pi_set_sn(struct kvm_vcpu *vcpu)
+{
+   pi_set_sn(vcpu_to_pi_desc(vcpu));
+}
+
 static unsigned long shadow_read_only_fields[] = {
/*
 * We do NOT shadow fields that are modified when L0
@@ -10307,6 +10317,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+   .pi_clear_sn = vmx_pi_clear_sn,
+   .pi_set_sn = vmx_pi_set_sn,
 };
 
 static int __init vmx_init(void)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 03/19] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-07-13 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c | 24 
 2 files changed, 26 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8c0ec3..b8832e5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1180,4 +1180,6 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, 
u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 72298b3..9e42645 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -299,6 +299,30 @@ out:
return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq-shorthand,
+   irq-dest_id, irq-dest_mode))
+   continue;
+
+   r++;
+   *dest_vcpu = vcpu;
+   }
+
+   if (r == 1)
+   return true;
+   else
+   return false;
+}
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 09/19] vfio: Register/unregister irq_bypass_producer

2015-07-13 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

Signed-off-by: Feng Wu feng...@intel.com
---
 drivers/vfio/pci/vfio_pci_intrs.c   | 19 +++
 drivers/vfio/pci/vfio_pci_private.h |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 1f577b4..4795606 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -305,6 +305,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, 
int nvec, bool msix)
return 0;
 }
 
+void vfio_pci_add_consumer(struct irq_bypass_producer *prod,
+struct irq_bypass_consumer *cons)
+{
+}
+
+void vfio_pci_del_consumer(struct irq_bypass_producer *prod,
+struct irq_bypass_consumer *cons)
+{
+}
+
 static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
  int vector, int fd, bool msix)
 {
@@ -319,6 +329,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev-ctx[vector].trigger) {
free_irq(irq, vdev-ctx[vector].trigger);
+   irq_bypass_unregister_producer(vdev-ctx[vector].producer);
kfree(vdev-ctx[vector].name);
eventfd_ctx_put(vdev-ctx[vector].trigger);
vdev-ctx[vector].trigger = NULL;
@@ -360,6 +371,14 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
+   INIT_LIST_HEAD(vdev-ctx[vector].producer.node);
+   vdev-ctx[vector].producer.token = trigger;
+   vdev-ctx[vector].producer.irq = irq;
+   vdev-ctx[vector].producer.add_consumer = vfio_pci_add_consumer;
+   vdev-ctx[vector].producer.del_consumer = vfio_pci_del_consumer;
+   ret = irq_bypass_register_producer(vdev-ctx[vector].producer);
+   WARN_ON(ret);
+
vdev-ctx[vector].trigger = trigger;
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include linux/mutex.h
 #include linux/pci.h
+#include linux/irqbypass.h
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 16/19] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-07-13 Thread Feng Wu
This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/x86.c|  5 +
 include/linux/kvm_host.h  |  3 +++
 include/linux/kvm_irqfd.h |  2 ++
 virt/kvm/eventfd.c| 13 -
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 62bbafe..a88e659 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8063,6 +8063,11 @@ void kvm_arch_irq_consumer_init(struct 
irq_bypass_consumer *cons)
cons-del_producer = kvm_arch_irq_bypass_del_producer;
 }
 
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
+{
+   irqfd-arch_update = kvm_arch_update_pi_irte;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e693b3a..b37ebca 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,8 @@
 
 #include asm/kvm_host.h
 
+struct kvm_kernel_irqfd;
+
 /*
  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
  * in kvm, other bits are visible for userspace which are defined in
@@ -1074,6 +1076,7 @@ extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
 void kvm_arch_irq_consumer_init(struct irq_bypass_consumer *cons);
+void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd);
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index cf9aad4..47a2696 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -67,6 +67,8 @@ struct kvm_kernel_irqfd {
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+   int (*arch_update)(struct kvm *kvm, unsigned int host_irq,
+  uint32_t guest_irq, bool set);
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 4225eea..762282c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -276,6 +276,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
INIT_LIST_HEAD(irqfd-list);
INIT_WORK(irqfd-inject, irqfd_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
+   kvm_arch_irqfd_init(irqfd);
seqcount_init(irqfd-irq_entry_sc);
 
f = fdget(args-fd);
@@ -562,13 +563,23 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
+   int ret;
struct kvm_kernel_irqfd *irqfd;
 
spin_lock_irq(kvm-irqfds.lock);
 
-   list_for_each_entry(irqfd, kvm-irqfds.items, list)
+   list_for_each_entry(irqfd, kvm-irqfds.items, list) {
irqfd_update(kvm, irqfd);
 
+   if (irqfd-arch_update) {
+   BUG_ON(!irqfd-producer);
+   ret = irqfd-arch_update(
+   irqfd-kvm, irqfd-producer-irq,
+   irqfd-gsi, 1);
+   WARN_ON(ret);
+   }
+   }
+
spin_unlock_irq(kvm-irqfds.lock);
 }
 
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 14/19] KVM: x86: Add arch specific routines for irqbypass manager

2015-07-13 Thread Feng Wu
Add the following x86 specific routines for irqbypass manger:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c  | 40 
 include/linux/kvm_host.h|  2 ++
 3 files changed, 43 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1b0278e..6db761b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include linux/perf_event.h
 #include linux/pvclock_gtod.h
 #include linux/clocksource.h
+#include linux/irqbypass.h
 
 #include asm/pvclock-abi.h
 #include asm/desc.h
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d81ac02..62bbafe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -49,6 +49,8 @@
 #include linux/pci.h
 #include linux/timekeeper_internal.h
 #include linux/pvclock_gtod.h
+#include linux/kvm_irqfd.h
+#include linux/irqbypass.h
 #include trace/events/kvm.h
 
 #define CREATE_TRACE_POINTS
@@ -8023,6 +8025,44 @@ out:
return ret;
 }
 
+void kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = prod;
+
+   ret = kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 1);
+   WARN_ON(ret);
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd-producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+   */
+   ret = kvm_arch_update_pi_irte(irqfd-kvm, prod-irq, irqfd-gsi, 0);
+   WARN_ON(ret);
+}
+
+void kvm_arch_irq_consumer_init(struct irq_bypass_consumer *cons)
+{
+   cons-add_producer = kvm_arch_irq_bypass_add_producer;
+   cons-del_producer = kvm_arch_irq_bypass_del_producer;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f591f7c..e693b3a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1073,6 +1073,8 @@ extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
+void kvm_arch_irq_consumer_init(struct irq_bypass_consumer *cons);
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 15/19] KVM: eventfd: add irq bypass consumer management

2015-07-13 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

This patch adds the registration/unregistration of an
irq_bypass_consumer on irqfd assignment/deassignment.

Signed-off-by: Eric Auger eric.au...@linaro.org
Signed-off-by: Feng Wu feng...@intel.com
---
 virt/kvm/eventfd.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 647ffb8..4225eea 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 #include linux/srcu.h
 #include linux/slab.h
 #include linux/seqlock.h
+#include linux/irqbypass.h
 #include trace/events/kvm.h
 
 #include kvm/iodev.h
@@ -140,6 +141,7 @@ irqfd_shutdown(struct work_struct *work)
/*
 * It is now safe to release the object's resources
 */
+   irq_bypass_unregister_consumer(irqfd-consumer);
eventfd_ctx_put(irqfd-eventfd);
kfree(irqfd);
 }
@@ -380,6 +382,11 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 */
fdput(f);
 
+   irqfd-consumer.token = (void *)irqfd-eventfd;
+   kvm_arch_irq_consumer_init(irqfd-consumer);
+   ret = irq_bypass_register_consumer(irqfd-consumer);
+   WARN_ON(ret);
+
return 0;
 
 fail:
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 18/19] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-07-13 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/vmx.c  | 158 
 arch/x86/kvm/x86.c  |  42 ---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 199 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6db761b..68548d8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -844,6 +844,9 @@ struct kvm_x86_ops {
 
void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
void (*pi_set_sn)(struct kvm_vcpu *vcpu);
+
+   int (*pi_pre_block)(struct kvm_vcpu *vcpu);
+   void (*pi_post_block)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dd6f3d5..cecd018 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -887,6 +887,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2971,6 +2978,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6098,6 +6107,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6282,6 +6310,8 @@ static __init int hardware_setup(void)
kvm_x86_ops-enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10235,6 +10265,131 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *  'NDST' -- vcpu-pre_pcpu
+ *  'NV' -- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pi_pre_block(struct kvm_vcpu *vcpu)
+{
+   unsigned long flags;
+   unsigned int dest;
+   struct pi_desc old, new;
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   vcpu-pre_pcpu = vcpu-cpu;
+   spin_lock_irqsave(per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu-pre_pcpu), flags);
+   list_add_tail(vcpu-blocked_vcpu_list,
+ per_cpu(blocked_vcpu_on_cpu,
+ vcpu-pre_pcpu));
+   spin_unlock_irqrestore(per_cpu(blocked_vcpu_on_cpu_lock,
+  vcpu-pre_pcpu), flags);
+
+   do {
+   old.control = new.control = pi_desc-control;
+
+   /*
+* We should not block the vCPU if
+* an interrupt is posted for it.
+*/
+   if (pi_test_on(pi_desc) == 1) {
+   spin_lock_irqsave(per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu-pre_pcpu), flags);
+   list_del(vcpu-blocked_vcpu_list

[v5 10/19] KVM, x86: Select IRQ_BYPASS_MANAGER for KVM_INTEL

2015-07-13 Thread Feng Wu
Enable irq bypass manager for kvm-intel.

Signed-off-by: Feng Wu feng...@intel.com
---
 arch/x86/kvm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 921a8f9..be125bc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -61,6 +61,7 @@ config KVM_INTEL
depends on KVM
# for perf_guest_get_msrs():
depends on CPU_SUP_INTEL
+   select IRQ_BYPASS_MANAGER
---help---
  Provides support for KVM on Intel processors equipped with the VT
  extensions.
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 12/19] KVM: eventfd: add irq bypass information in irqfd

2015-07-13 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

This patch adds the following new members in 'struct kvm_kernel_irqfd'
- struct irq_bypass_consumer consumer
- struct irq_bypass_producer *producer

Signed-off-by: Eric Auger eric.au...@linaro.org
Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_irqfd.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index f926b39..cf9aad4 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -17,6 +17,7 @@
 
 #include linux/kvm_host.h
 #include linux/poll.h
+#include linux/irqbypass.h
 
 /*
  * Resampling irqfds are a special variety of irqfds used to emulate
@@ -64,6 +65,8 @@ struct kvm_kernel_irqfd {
struct list_head list;
poll_table pt;
struct work_struct shutdown;
+   struct irq_bypass_consumer consumer;
+   struct irq_bypass_producer *producer;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
-- 
2.1.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 11/19] KVM: create kvm_irqfd.h

2015-07-13 Thread Feng Wu
From: Eric Auger eric.au...@linaro.org

Move _irqfd_resampler and _irqfd struct declarations in a new
public header: kvm_irqfd.h. They are respectively renamed into
kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
will be used by architecture specific code, in the context of
IRQ bypass manager integration.

Signed-off-by: Eric Auger eric.au...@linaro.org
---
 include/linux/kvm_irqfd.h | 69 ++
 virt/kvm/eventfd.c| 95 ---
 2 files changed, 92 insertions(+), 72 deletions(-)
 create mode 100644 include/linux/kvm_irqfd.h

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
new file mode 100644
index 000..f926b39
--- /dev/null
+++ b/include/linux/kvm_irqfd.h
@@ -0,0 +1,69 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * Credit goes to Avi Kivity for the original idea.
+ */
+
+#ifndef __LINUX_KVM_IRQFD_H
+#define __LINUX_KVM_IRQFD_H
+
+#include linux/kvm_host.h
+#include linux/poll.h
+
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgment through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct kvm_kernel_irqfd_resampler {
+   struct kvm *kvm;
+   /*
+* List of resampling struct _irqfd objects sharing this gsi.
+* RCU list modified under kvm-irqfds.resampler_lock
+*/
+   struct list_head list;
+   struct kvm_irq_ack_notifier notifier;
+   /*
+* Entry in list of kvm-irqfd.resampler_list.  Use for sharing
+* resamplers among irqfds on the same gsi.
+* Accessed and modified under kvm-irqfds.resampler_lock
+*/
+   struct list_head link;
+};
+
+struct kvm_kernel_irqfd {
+   /* Used for MSI fast-path */
+   struct kvm *kvm;
+   wait_queue_t wait;
+   /* Update side is protected by irqfds.lock */
+   struct kvm_kernel_irq_routing_entry irq_entry;
+   seqcount_t irq_entry_sc;
+   /* Used for level IRQ fast-path */
+   int gsi;
+   struct work_struct inject;
+   /* The resampler used by this irqfd (resampler-only) */
+   struct kvm_kernel_irqfd_resampler *resampler;
+   /* Eventfd notified on resample (resampler-only) */
+   struct eventfd_ctx *resamplefd;
+   /* Entry in list of irqfds for a resampler (resampler-only) */
+   struct list_head resampler_link;
+   /* Used for setup/shutdown */
+   struct eventfd_ctx *eventfd;
+   struct list_head list;
+   poll_table pt;
+   struct work_struct shutdown;
+};
+
+#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9ff4193..647ffb8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
 #include linux/kvm_host.h
 #include linux/kvm.h
+#include linux/kvm_irqfd.h
 #include linux/workqueue.h
 #include linux/syscalls.h
 #include linux/wait.h
@@ -39,68 +40,14 @@
 #include kvm/iodev.h
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * 
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * 
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-   struct kvm *kvm;
-   /*
-* List of resampling struct _irqfd objects sharing this gsi.
-* RCU list modified under kvm-irqfds.resampler_lock
-*/
-   struct list_head list;
-   struct kvm_irq_ack_notifier notifier;
-   /*
-* Entry in list of kvm-irqfd.resampler_list.  Use for sharing
-* resamplers among irqfds on the same gsi.
-* 

  1   2   3   >