This patch enables INIT/SIPI handling using in-kernel APIC by introducing a mp_state to emulate the SMP state transition.
Signed-off-by: Qing He <[EMAIL PROTECTED]>
Signed-off-by: Xin Li <[EMAIL PROTECTED]>
This patch works for both with or without apic spinlock, but it has
differences
for two cases at AP bringup time, the apic spinlock overhead can be
observed.
For the non-apic-removal case, according to the semantics,
__apic_accept_irq
in apic_send_ipi should be protected by spinlock if the target apic is
not
the same with current apic, but this introduces lock taking order issue
and
possible deadlocks. Fortunately, not adding these spinlocks does not
seem to
cause additional problems, so this patch leaves them unchanged. This
little
`inconsistency' does not exist if we remove the whole apic spinlock
thing,
as suggested by Eddie Dong.
p.s. It's only the vmx part of the patch, svm side should be similar.
---
drivers/kvm/irq.c | 2 +-
drivers/kvm/irq.h | 1 +
drivers/kvm/kvm.h | 7 +++++++
drivers/kvm/kvm_main.c | 26 ++++++++++++++++++++------
drivers/kvm/lapic.c | 45
+++++++++++++++++++++++++++++++++++++--------
drivers/kvm/vmx.c | 24 +++++++++++++++++++++---
6 files changed, 87 insertions(+), 18 deletions(-)
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index e527853..8a59406 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -82,7 +82,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
- if (vcpu->guest_mode)
+ if (vcpu->guest_mode && ipi_pcpu != smp_processor_id())
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
0, 0);
}
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index b6283d2..0034173 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -138,6 +138,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_lapic_reset(struct kvm_vcpu *vcpu);
void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index dbb929d..5e318b6 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -326,6 +326,13 @@ struct kvm_vcpu {
u64 shadow_efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
+#define VCPU_MP_STATE_RUNNABLE 0
+#define VCPU_MP_STATE_UNINITIALIZED 1
+#define VCPU_MP_STATE_INIT_RECEIVED 2
+#define VCPU_MP_STATE_SIPI_RECEIVED 3
+#define VCPU_MP_STATE_HALTED 4
+ int mp_state;
+ int sipi_vector;
u64 ia32_misc_enable_msr;
struct kvm_mmu mmu;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index a25dfc9..66ac71e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -249,6 +249,10 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm
*kvm, unsigned id)
vcpu->mmu.root_hpa = INVALID_PAGE;
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
+ if (!irqchip_in_kernel(kvm) || id == 0)
+ vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+ else
+ vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
init_waitqueue_head(&vcpu->wq);
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -1371,7 +1375,7 @@ EXPORT_SYMBOL_GPL(emulate_instruction);
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
-static void kvm_vcpu_kernel_halt(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
DECLARE_WAITQUEUE(wait, current);
@@ -1380,24 +1384,28 @@ static void kvm_vcpu_kernel_halt(struct kvm_vcpu
*vcpu)
/*
* We will block until either an interrupt or a signal wakes us
up
*/
- while(!(irqchip_in_kernel(vcpu->kvm) &&
kvm_cpu_has_interrupt(vcpu))
- && !vcpu->irq_summary
- && !signal_pending(current)) {
+ while (!kvm_cpu_has_interrupt(vcpu)
+ && !signal_pending(current)
+ && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
+ && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
set_current_state(TASK_INTERRUPTIBLE);
vcpu_put(vcpu);
schedule();
vcpu_load(vcpu);
}
+ __set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait);
- set_current_state(TASK_RUNNING);
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
- kvm_vcpu_kernel_halt(vcpu);
+ vcpu->mp_state = VCPU_MP_STATE_HALTED;
+ kvm_vcpu_block(vcpu);
+ if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
+ return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -2001,6 +2009,12 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
+ if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+ kvm_vcpu_block(vcpu);
+ vcpu_put(vcpu);
+ return -EAGAIN;
+ }
+
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index b28f326..f68b025 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -313,8 +313,8 @@ static int apic_match_dest(struct kvm_vcpu *vcpu,
struct kvm_lapic *source,
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode)
{
- int result = 0;
- int orig_irr;
+ int orig_irr, result = 0;
+ struct kvm_vcpu *vcpu = apic->vcpu;
switch (delivery_mode) {
case APIC_DM_FIXED:
@@ -336,8 +336,14 @@ static int __apic_accept_irq(struct kvm_lapic
*apic, int delivery_mode,
} else
apic_clear_vector(vector, apic->regs +
APIC_TMR);
- kvm_vcpu_kick(apic->vcpu);
-
+ if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+ kvm_vcpu_kick(vcpu);
+ else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
+ vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ }
+
result = (orig_irr == 0);
break;
@@ -353,11 +359,30 @@ static int __apic_accept_irq(struct kvm_lapic
*apic, int delivery_mode,
break;
case APIC_DM_INIT:
- printk(KERN_DEBUG "Ignoring guest INIT\n");
+ if (level) {
+ if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+ printk(KERN_DEBUG
+ "INIT on a runnable vcpu %d\n",
+ vcpu->vcpu_id);
+ vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
+ kvm_vcpu_kick(vcpu);
+ } else {
+ printk(KERN_DEBUG
+ "Ignoring de-assert INIT to vcpu %d\n",
+ vcpu->vcpu_id);
+ }
+
break;
case APIC_DM_STARTUP:
- printk(KERN_DEBUG "Ignoring guest STARTUP\n");
+ printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
+ vcpu->vcpu_id, vector);
+ if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
+ vcpu->sipi_vector = vector;
+ vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ }
break;
default:
@@ -812,7 +837,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
-static void lapic_reset(struct kvm_vcpu *vcpu)
+void kvm_lapic_reset(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic;
int i;
@@ -861,6 +886,7 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
vcpu->apic_base, apic->base_address);
}
+EXPORT_SYMBOL_GPL(kvm_lapic_reset);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
{
@@ -900,7 +926,10 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
atomic_inc(&apic->timer.pending);
if (waitqueue_active(q))
+ {
+ apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
wake_up_interruptible(q);
+ }
if (apic_lvtt_period(apic)) {
result = 1;
apic->timer.dev.expires = ktime_add_ns(
@@ -964,7 +993,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->base_address = APIC_DEFAULT_PHYS_BASE;
vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
- lapic_reset(vcpu);
+ kvm_lapic_reset(vcpu);
apic->dev.read = apic_mmio_read;
apic->dev.write = apic_mmio_write;
apic->dev.in_range = apic_mmio_range;
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 8a503c2..f6f1a06 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1404,6 +1404,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
goto out;
}
+ vmx->vcpu.rmode.active = 0;
+
vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -1417,8 +1419,13 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.
Sigh.
*/
- vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
- vmcs_writel(GUEST_CS_BASE, 0x000f0000);
+ if (vmx->vcpu.vcpu_id == 0) {
+ vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
+ vmcs_writel(GUEST_CS_BASE, 0x000f0000);
+ } else {
+ vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector <<
8);
+ vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
+ }
vmcs_write32(GUEST_CS_LIMIT, 0xffff);
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
@@ -1443,7 +1450,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(GUEST_SYSENTER_EIP, 0);
vmcs_writel(GUEST_RFLAGS, 0x02);
- vmcs_writel(GUEST_RIP, 0xfff0);
+ if (vmx->vcpu.vcpu_id == 0)
+ vmcs_writel(GUEST_RIP, 0xfff0);
+ else
+ vmcs_writel(GUEST_RIP, 0);
vmcs_writel(GUEST_RSP, 0);
//todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
@@ -2193,6 +2203,14 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
u8 fail;
int r;
+ if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
+ printk("vcpu %d received sipi with vector # %x\n",
+ vcpu->vcpu_id, vcpu->sipi_vector);
+ kvm_lapic_reset(vcpu);
+ vmx_vcpu_setup(vmx);
+ vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+ }
+
preempted:
if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu);
0006-kvm-init-sipi-kernapic.patch
Description: 0006-kvm-init-sipi-kernapic.patch
------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________ kvm-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/kvm-devel
