[PATCH v2 6/6] kvm: i386: irqchip: take BQL only if there is an interrupt

Igor Mammedov Wed, 30 Jul 2025 07:11:53 -0700

when kernel-irqchip=split is used, QEMU still hits BQL
contention issue when reading ACPI PM/HPET timers
(despite of timer[s] access being lock-less).


So Windows with more than 255 cpus is still not able to
boot (since it requires iommu -> split irqchip).

Problematic path is in kvm_arch_pre_run() where BQL is taken
unconditionally when split irqchip is in use.

There are a few parts tha BQL protects there:
  1. interrupt check and injecting

    however we do not take BQL when checking for pending
    interrupt (even within the same function), so the patch
    takes the same approach for cpu->interrupt_request checks
    and takes BQL only if there is a job to do.

  2. request_interrupt_window access
      CPUState::kvm_run::request_interrupt_window doesn't need BQL
      as it's accessed on side QEMU only by its own vCPU thread.
      The only thing that BQL provides there is implict barrier.
      Which can be done by using cheaper explicit barrier there.

  3. cr8/cpu_get_apic_tpr access
      the same (as #2) applies to CPUState::kvm_run::cr8 write,
      and APIC registers are also cached/synced (get/put) within
      the vCPU thread it belongs to.

Taking BQL only when is necessary, eleminates BQL bottleneck on
IO/MMIO only exit path, improoving latency by 80% on HPET micro
benchmark.

This lets Windows to boot succesfully (in case hv-time isn't used)
when more than 255 vCPUs are in use.

Signed-off-by: Igor Mammedov <imamm...@redhat.com>
---
 target/i386/kvm/kvm.c | 58 +++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 369626f8c8..32024d50f5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5450,6 +5450,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
     CPUX86State *env = &x86_cpu->env;
+    bool release_bql = 0;
     int ret;
 
     /* Inject NMI */
@@ -5478,15 +5479,16 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run 
*run)
         }
     }
 
-    if (!kvm_pic_in_kernel()) {
-        bql_lock();
-    }
 
     /* Force the VCPU out of its inner loop to process any INIT requests
      * or (for userspace APIC, but it is cheap to combine the checks here)
      * pending TPR access reports.
      */
     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+        if (!kvm_pic_in_kernel()) {
+            bql_lock();
+            release_bql = true;
+        }
         if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
             !(env->hflags & HF_SMM_MASK)) {
             cpu->exit_request = 1;
@@ -5497,24 +5499,31 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run 
*run)
     }
 
     if (!kvm_pic_in_kernel()) {
-        /* Try to inject an interrupt if the guest can accept it */
-        if (run->ready_for_interrupt_injection &&
-            (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
-            (env->eflags & IF_MASK)) {
-            int irq;
-
-            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
-            irq = cpu_get_pic_interrupt(env);
-            if (irq >= 0) {
-                struct kvm_interrupt intr;
-
-                intr.irq = irq;
-                DPRINTF("injected interrupt %d\n", irq);
-                ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
-                if (ret < 0) {
-                    fprintf(stderr,
-                            "KVM: injection failed, interrupt lost (%s)\n",
-                            strerror(-ret));
+        if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
+            if (!release_bql) {
+                bql_lock();
+                release_bql = true;
+            }
+
+            /* Try to inject an interrupt if the guest can accept it */
+            if (run->ready_for_interrupt_injection &&
+                (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+                (env->eflags & IF_MASK)) {
+                int irq;
+
+                cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
+                irq = cpu_get_pic_interrupt(env);
+                if (irq >= 0) {
+                    struct kvm_interrupt intr;
+
+                    intr.irq = irq;
+                    DPRINTF("injected interrupt %d\n", irq);
+                    ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
+                    if (ret < 0) {
+                        fprintf(stderr,
+                                "KVM: injection failed, interrupt lost (%s)\n",
+                                strerror(-ret));
+                    }
                 }
             }
         }
@@ -5531,7 +5540,14 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 
         DPRINTF("setting tpr\n");
         run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
+        /*
+         * make sure that request_interrupt_window/cr8 are set
+         * before KVM_RUN might read them
+         */
+        smp_mb();
+    }
 
+    if (release_bql) {
         bql_unlock();
     }
 }
-- 
2.47.1

[PATCH v2 6/6] kvm: i386: irqchip: take BQL only if there is an interrupt

Reply via email to