On Fri, 29 Aug 2025 17:31:13 +0200 Paolo Bonzini <pbonz...@redhat.com> wrote:
> Make the code common to all accelerators: after seeing cpu->exit_request > set to true, accelerator code needs to reach qemu_wait_io_event_common(). > > So for the common cases where they use qemu_wait_io_event(), go ahead and > clear it in there. Note that the cheap qatomic_set() is enough because > at this point the thread has taken the BQL; qatomic_set_mb() is not needed. > In particular, this is the ordering of the communication between > I/O and vCPU threads is always the same. > > In the I/O thread: > > (a) store other memory locations that will be checked if cpu->exit_request > or cpu->interrupt_request is 1 (for example cpu->stop or cpu->work_list > for cpu->exit_request) > > (b) cpu_exit(): store-release cpu->exit_request, or > (b) cpu_interrupt(): store-release cpu->interrupt_request > > >>> at this point, cpu->halt_cond is broadcast and the BQL released > > (c) do the accelerator-specific kick (e.g. write icount_decr for TCG, > pthread_kill for KVM, etc.) > > In the vCPU thread instead the opposite order is respected: > > (c) the accelerator's execution loop exits thanks to the kick > > (b) then the inner execution loop checks cpu->interrupt_request > and cpu->exit_request. If needed cpu->interrupt_request is > converted into cpu->exit_request when work is needed outside > the execution loop. > > (a) then the other memory locations are checked. Some may need to > be read under the BQL, but the vCPU thread may also take other > locks (e.g. for queued work items) or none at all. > > qatomic_set_mb() would only be needed if the halt sleep was done > outside the BQL (though in that case, cpu->exit_request probably > would be replaced by a QemuEvent or something like that). > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> Reviewed-by: Igor Mammedov <imamm...@redhat.com> > --- > accel/kvm/kvm-all.c | 2 -- > accel/tcg/cpu-exec.c | 1 - > accel/tcg/tcg-accel-ops-rr.c | 9 +++++++-- > accel/tcg/tcg-accel-ops.c | 2 -- > accel/tcg/user-exec.c | 1 + > system/cpus.c | 1 + > target/i386/nvmm/nvmm-all.c | 2 -- > target/i386/whpx/whpx-all.c | 2 -- > 8 files changed, 9 insertions(+), 11 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index e4167d94b4f..d13156bee87 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -3155,7 +3155,6 @@ int kvm_cpu_exec(CPUState *cpu) > trace_kvm_cpu_exec(); > > if (kvm_arch_process_async_events(cpu)) { > - qatomic_set(&cpu->exit_request, 0); > return EXCP_HLT; > } > > @@ -3345,7 +3344,6 @@ int kvm_cpu_exec(CPUState *cpu) > vm_stop(RUN_STATE_INTERNAL_ERROR); > } > > - qatomic_set(&cpu->exit_request, 0); > return ret; > } > > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index 3ae545e888f..ad94f96b252 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -872,7 +872,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, > * The corresponding store-release is in cpu_exit. > */ > if (unlikely(qatomic_load_acquire(&cpu->exit_request)) || > icount_exit_request(cpu)) { > - qatomic_set(&cpu->exit_request, 0); > if (cpu->exception_index == -1) { > cpu->exception_index = EXCP_INTERRUPT; > } > diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c > index 610292d3bac..e9d291dc391 100644 > --- a/accel/tcg/tcg-accel-ops-rr.c > +++ b/accel/tcg/tcg-accel-ops-rr.c > @@ -286,8 +286,13 @@ static void *rr_cpu_thread_fn(void *arg) > /* Does not need a memory barrier because a spurious wakeup is okay. > */ > qatomic_set(&rr_current_cpu, NULL); > > - if (cpu && qatomic_read(&cpu->exit_request)) { > - qatomic_set_mb(&cpu->exit_request, 0); > + if (cpu) { > + /* > + * This could even reset exit_request for all CPUs, but in > practice > + * races between CPU exits and changes to "cpu" are so rare that > + * there's no advantage in doing so. > + */ > + qatomic_set(&cpu->exit_request, false); > } > > if (icount_enabled() && all_cpu_threads_idle()) { > diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c > index 1f662a9c745..3bd98005042 100644 > --- a/accel/tcg/tcg-accel-ops.c > +++ b/accel/tcg/tcg-accel-ops.c > @@ -82,8 +82,6 @@ int tcg_cpu_exec(CPUState *cpu) > ret = cpu_exec(cpu); > cpu_exec_end(cpu); > > - qatomic_set_mb(&cpu->exit_request, 0); > - > return ret; > } > > diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c > index 81906d2e033..8f4f049b924 100644 > --- a/accel/tcg/user-exec.c > +++ b/accel/tcg/user-exec.c > @@ -54,6 +54,7 @@ void qemu_cpu_kick(CPUState *cpu) > > void qemu_wait_io_event(CPUState *cpu) > { > + qatomic_set(&cpu->exit_request, false); > process_queued_cpu_work(cpu); > } > > diff --git a/system/cpus.c b/system/cpus.c > index bb13942cbb7..f989d9938b6 100644 > --- a/system/cpus.c > +++ b/system/cpus.c > @@ -463,6 +463,7 @@ void qemu_wait_io_event(CPUState *cpu) > { > bool slept = false; > > + qatomic_set(&cpu->exit_request, false); > while (cpu_thread_is_idle(cpu)) { > if (!slept) { > slept = true; > diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c > index 7e36c42fbb4..ed424251673 100644 > --- a/target/i386/nvmm/nvmm-all.c > +++ b/target/i386/nvmm/nvmm-all.c > @@ -817,8 +817,6 @@ nvmm_vcpu_loop(CPUState *cpu) > cpu_exec_end(cpu); > bql_lock(); > > - qatomic_set(&cpu->exit_request, false); > - > return ret < 0; > } > > diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c > index 00fb7e23100..2a85168ed51 100644 > --- a/target/i386/whpx/whpx-all.c > +++ b/target/i386/whpx/whpx-all.c > @@ -2050,8 +2050,6 @@ static int whpx_vcpu_run(CPUState *cpu) > whpx_last_vcpu_stopping(cpu); > } > > - qatomic_set(&cpu->exit_request, false); > - > return ret < 0; > } >