Glauber Costa wrote:
> When we create a new vcpu, we need to make sure that
> all of the state it is going to use (apic state, for example)
> already exists. We can do it nicely by making sure kvm_init_vcpu
> is executed after everything else in cpu creation.
>
> After that, the first call to KVM_SET_LAPIC ioctl will not find an
> existant vcpu. So we introduce a function that tell us that the vcpu
> is already initialized, and is it safe to call the ioctl.
>
> We force the execution of the KVM_SET_LAPIC from within the new
> vcpu thread, that will replace this first initialization call.
>
> Signed-off-by: Glauber Costa <[email protected]>
> ---
> hw/apic.c | 21 +++++++++++----------
> hw/pc.c | 1 +
> qemu-kvm.c | 10 ++++++++++
> qemu-kvm.h | 4 ++++
> target-i386/helper.c | 2 --
> 5 files changed, 26 insertions(+), 12 deletions(-)
>
> diff --git a/hw/apic.c b/hw/apic.c
> index 466fb7e..b7cd18e 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -891,6 +891,15 @@ static void kvm_kernel_lapic_load_from_user(APICState *s)
>
> #endif
>
> +void qemu_kvm_load_lapic(CPUState *env)
> +{
> +#ifdef KVM_CAP_IRQCHIP
> + if (kvm_enabled() && kvm_vcpu_inited(env) &&
> qemu_kvm_irqchip_in_kernel()) {
> + kvm_kernel_lapic_load_from_user(env->apic_state);
> + }
> +#endif
> +}
> +
> static void apic_save(QEMUFile *f, void *opaque)
> {
> APICState *s = opaque;
> @@ -965,11 +974,7 @@ static int apic_load(QEMUFile *f, void *opaque, int
> version_id)
> if (version_id >= 2)
> qemu_get_timer(f, s->timer);
>
> -#ifdef KVM_CAP_IRQCHIP
> - if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
> - kvm_kernel_lapic_load_from_user(s);
> - }
> -#endif
> + qemu_kvm_load_lapic(s->cpu_env);
>
> return 0;
> }
> @@ -991,11 +996,7 @@ static void apic_reset(void *opaque)
> */
> s->lvt[APIC_LVT_LINT0] = 0x700;
> }
> -#ifdef KVM_CAP_IRQCHIP
> - if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
> - kvm_kernel_lapic_load_from_user(s);
> - }
> -#endif
> + qemu_kvm_load_lapic(s->cpu_env);
> }
>
> static CPUReadMemoryFunc *apic_mem_read[3] = {
> diff --git a/hw/pc.c b/hw/pc.c
> index 34a4d25..1675510 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -854,6 +854,7 @@ CPUState *pc_new_cpu(int cpu, const char *cpu_model, int
> pci_enabled)
> if (pci_enabled) {
> apic_init(env);
> }
> + qemu_init_vcpu(env);
> return env;
Yeah, it always looks funny when you patch mis-formatted code...
> }
>
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 8c0d463..8fd80c1 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -435,6 +435,9 @@ static void *ap_main_loop(void *_env)
> kvm_create_vcpu(kvm_context, env->cpu_index);
> kvm_qemu_init_env(env);
>
> + /* APIC state creation takes place before we get here. So despite the
> fact that
> + * apic_reset() (called by apic_init) will also load the apic state, we
> have to redo it here
> + */
> #ifdef USE_KVM_DEVICE_ASSIGNMENT
> /* do ioperm for io ports of assigned devices */
> LIST_FOREACH(data, &ioperm_head, entries)
> @@ -446,6 +449,8 @@ static void *ap_main_loop(void *_env)
> current_env->kvm_cpu_state.created = 1;
> pthread_cond_signal(&qemu_vcpu_cond);
>
> + qemu_kvm_load_lapic(env);
> +
This feels strange after a first glance, I need to look closer... Ah
wait, found one reason for this feeling: APIC is x86 stuff, but you are
patching generic code.
> /* and wait for machine initialization */
> while (!qemu_system_ready)
> qemu_cond_wait(&qemu_system_cond);
> @@ -463,6 +468,11 @@ void kvm_init_vcpu(CPUState *env)
> qemu_cond_wait(&qemu_vcpu_cond);
> }
>
> +int kvm_vcpu_inited(CPUState *env)
> +{
> + return env->kvm_cpu_state.created;
> +}
> +
> int kvm_init_ap(void)
> {
> #ifdef TARGET_I386
> diff --git a/qemu-kvm.h b/qemu-kvm.h
> index c0549df..6fa9d5a 100644
> --- a/qemu-kvm.h
> +++ b/qemu-kvm.h
> @@ -16,6 +16,7 @@ int kvm_main_loop(void);
> int kvm_qemu_init(void);
> int kvm_qemu_create_context(void);
> int kvm_init_ap(void);
> +int kvm_vcpu_inited(CPUState *env);
> void kvm_qemu_destroy(void);
> void kvm_load_registers(CPUState *env);
> void kvm_save_registers(CPUState *env);
> @@ -31,6 +32,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long
> reinject_trap);
> int kvm_qemu_init_env(CPUState *env);
> int kvm_qemu_check_extension(int ext);
> void kvm_apic_init(CPUState *env);
> +/* called from vcpu initialization */
> +void qemu_kvm_load_lapic(CPUState *env);
> +
> int kvm_set_irq(int irq, int level, int *status);
>
> int kvm_physical_memory_set_dirty_tracking(int enable);
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 719e31e..511b48c 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -1696,7 +1696,5 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
> kqemu_init(env);
> #endif
>
> - qemu_init_vcpu(env);
> -
> return env;
> }
The reordering of qemu_init_vcpu could also simplify reset management (I
have a patch pending that adds a kvm hook to apic reset for solving it
within the existing scheme). But I would suggest to get an ack from
upstream first, or better even merge this pattern there and then adjust
qemu-kvm. The other way around is calling for troubles if qemu sticks
with a different approach.
Jan
--
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html