From: Wanpeng Li <wanpen...@tencent.com>

Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
mode. 

Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
total msr-induced vmexits reduce ~70%.

Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: Radim Krčmář <rkrc...@redhat.com>
Cc: Vitaly Kuznetsov <vkuzn...@redhat.com>
Signed-off-by: Wanpeng Li <wanpen...@tencent.com>
---
 Documentation/virtual/kvm/cpuid.txt      |  4 +++
 Documentation/virtual/kvm/hypercalls.txt |  6 +++++
 arch/x86/include/uapi/asm/kvm_para.h     |  1 +
 arch/x86/kvm/cpuid.c                     |  3 ++-
 arch/x86/kvm/x86.c                       | 42 ++++++++++++++++++++++++++++++++
 5 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/cpuid.txt 
b/Documentation/virtual/kvm/cpuid.txt
index ab022dc..97ca194 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || 
paravirtualized async PF VM exit
                                    ||       || can be enabled by setting bit 2
                                    ||       || when writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || send IPIs.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt 
b/Documentation/virtual/kvm/hypercalls.txt
index a890529..a771ee8 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,9 @@ compute the CLOCK_REALTIME for its clock, at the same 
instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to send IPIs.
diff --git a/arch/x86/include/uapi/asm/kvm_para.h 
b/arch/x86/include/uapi/asm/kvm_para.h
index 0ede697..19980ec 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -28,6 +28,7 @@
 #define KVM_FEATURE_PV_UNHALT          7
 #define KVM_FEATURE_PV_TLB_FLUSH       9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT    10
+#define KVM_FEATURE_PV_SEND_IPI        11
 
 #define KVM_HINTS_REALTIME      0
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3..7bcfa61 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
                             (1 << KVM_FEATURE_PV_UNHALT) |
                             (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+                            (1 << KVM_FEATURE_PV_SEND_IPI);
 
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa7..c2cef21 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6689,6 +6689,45 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned 
long flags, int apicid)
        kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
+/*
+ * Return 0 if successfully added and 1 if discarded.
+ */
+static int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+                       unsigned long ipi_bitmap_high, unsigned long icr)
+{
+       int i;
+       struct kvm_apic_map *map;
+       struct kvm_vcpu *vcpu;
+       struct kvm_lapic_irq irq = {0};
+
+       switch (icr & APIC_VECTOR_MASK) {
+       default:
+               irq.vector = icr & APIC_VECTOR_MASK;
+               break;
+       case NMI_VECTOR:
+               break;
+       }
+       irq.delivery_mode = icr & APIC_MODE_MASK;
+
+       rcu_read_lock();
+       map = rcu_dereference(kvm->arch.apic_map);
+
+       for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+               vcpu = map->phys_map[i]->vcpu;
+               if (!kvm_apic_set_irq(vcpu, &irq, NULL))
+                       return 1;
+       }
+
+       for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+               vcpu = map->phys_map[i + BITS_PER_LONG]->vcpu;
+               if (!kvm_apic_set_irq(vcpu, &irq, NULL))
+                       return 1;
+       }
+
+       rcu_read_unlock();
+       return 0;
+}
+
 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.apicv_active = false;
@@ -6737,6 +6776,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        case KVM_HC_CLOCK_PAIRING:
                ret = kvm_pv_clock_pairing(vcpu, a0, a1);
                break;
+       case KVM_HC_SEND_IPI:
+               ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2);
+               break;
 #endif
        default:
                ret = -KVM_ENOSYS;
-- 
2.7.4

Reply via email to