commit 58df4587880e541057386abf2aea7b63bd7d9d97
Author: root <root@vt32-pae.(none)>
Date:   Wed Jun 20 15:31:31 2007 +0800

        In kernel PIC support.
        Mixed mode of irqchip has a hole in
        injecting device irq which may be not
        immediately processed and thus difficult
        to handle.
        Move PIC to kernel together with APIC can
        both solve the issue and get performance
        gain.
    
    Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..118c79b 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o virq.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
new file mode 100644
index 0000000..9ba8c1e
--- /dev/null
+++ b/drivers/kvm/i8259.c
@@ -0,0 +1,435 @@
+/*
+ * QEMU 8259 interrupt controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2007 Intel Corperation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *   Port from Qemu.
+ */
+#include "virq.h"
+#include <linux/mm.h>
+
+/* set irq level. If an edge is detected, then the IRR is set to 1 */
+static inline void pic_set_irq1(struct pic_state *s, int irq, int level)
+{
+	int mask;
+	mask = 1 << irq;
+	if (s->elcr & mask) {
+		/* level triggered */
+		if (level) {
+			s->irr |= mask;
+			s->last_irr |= mask;
+		} else {
+			s->irr &= ~mask;
+			s->last_irr &= ~mask;
+		}
+	} else {
+		/* edge triggered */
+		if (level) {
+			if ((s->last_irr & mask) == 0)
+				s->irr |= mask;
+			s->last_irr |= mask;
+		} else {
+			s->last_irr &= ~mask;
+		}
+	}
+}
+
+/* return the highest priority found in mask (highest = smallest
+   number). Return 8 if no irq */
+static inline int get_priority(struct pic_state *s, int mask)
+{
+	int priority;
+	if (mask == 0)
+		return 8;
+	priority = 0;
+	while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
+		priority++;
+	return priority;
+}
+
+/* return the pic wanted interrupt. return -1 if none */
+static int pic_get_irq(struct pic_state *s)
+{
+	int mask, cur_priority, priority;
+
+	mask = s->irr & ~s->imr;
+	priority = get_priority(s, mask);
+	if (priority == 8)
+		return -1;
+	/* compute current priority. If special fully nested mode on the
+	   master, the IRQ coming from the slave is not taken into account
+	   for the priority computation. */
+	mask = s->isr;
+	if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
+		mask &= ~(1 << 2);
+	cur_priority = get_priority(s, mask);
+	if (priority < cur_priority) {
+		/* higher priority found: an irq should be generated */
+		return (priority + s->priority_add) & 7;
+	} else {
+		return -1;
+	}
+}
+
+/* raise irq to CPU if necessary. must be called every time the active
+   irq may change */
+static void pic_update_irq(struct pic_state2 *s)
+{
+	int irq2, irq;
+
+	/* first look at slave pic */
+	irq2 = pic_get_irq(&s->pics[1]);
+	if (irq2 >= 0) {
+		/* if irq request by slave pic, signal master PIC */
+		pic_set_irq1(&s->pics[0], 2, 1);
+		pic_set_irq1(&s->pics[0], 2, 0);
+	}
+	/* look at requested irq */
+	irq = pic_get_irq(&s->pics[0]);
+	if (irq >= 0) {
+		s->irq_request(s->irq_request_opaque, 1);
+	}
+}
+
+void pic_set_irq_new(void *opaque, int irq, int level)
+{
+	struct pic_state2 *s = opaque;
+
+	pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+	/* used for IOAPIC irqs */
+	if (s->alt_irq_func)
+		s->alt_irq_func(s->alt_irq_opaque, irq, level);
+	pic_update_irq(s);
+}
+
+/* acknowledge interrupt 'irq' */
+static inline void pic_intack(struct pic_state *s, int irq)
+{
+	if (s->auto_eoi) {
+		if (s->rotate_on_auto_eoi)
+			s->priority_add = (irq + 1) & 7;
+	} else {
+		s->isr |= (1 << irq);
+	}
+	/* We don't clear a level sensitive interrupt here */
+	if (!(s->elcr & (1 << irq)))
+		s->irr &= ~(1 << irq);
+}
+
+int pic_read_irq(struct pic_state2 *s)
+{
+	int irq, irq2, intno;
+
+	irq = pic_get_irq(&s->pics[0]);
+	if (irq >= 0) {
+		pic_intack(&s->pics[0], irq);
+		if (irq == 2) {
+			irq2 = pic_get_irq(&s->pics[1]);
+			if (irq2 >= 0) {
+				pic_intack(&s->pics[1], irq2);
+			} else {
+				/* spurious IRQ on slave controller */
+				irq2 = 7;
+			}
+			intno = s->pics[1].irq_base + irq2;
+			irq = irq2 + 8;
+		} else {
+			intno = s->pics[0].irq_base + irq;
+		}
+	} else {
+		/* spurious IRQ on host controller */
+		irq = 7;
+		intno = s->pics[0].irq_base + irq;
+	}
+	pic_update_irq(s);
+
+	return intno;
+}
+
+static void pic_reset(void *opaque)
+{
+	struct pic_state *s = opaque;
+
+	s->last_irr = 0;
+	s->irr = 0;
+	s->imr = 0;
+	s->isr = 0;
+	s->priority_add = 0;
+	s->irq_base = 0;
+	s->read_reg_select = 0;
+	s->poll = 0;
+	s->special_mask = 0;
+	s->init_state = 0;
+	s->auto_eoi = 0;
+	s->rotate_on_auto_eoi = 0;
+	s->special_fully_nested_mode = 0;
+	s->init4 = 0;
+	/* Note: ELCR is not reset */
+}
+
+static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+	struct pic_state *s = opaque;
+	int priority, cmd, irq;
+
+	addr &= 1;
+	if (addr == 0) {
+		if (val & 0x10) {
+			/* init */
+			pic_reset(s);
+			/* deassert a pending interrupt */
+			s->pics_state->irq_request(s->pics_state->
+						   irq_request_opaque, 0);
+			s->init_state = 1;
+			s->init4 = val & 1;
+			if (val & 0x02)
+				printk(KERN_ERR "single mode not supported");
+			if (val & 0x08)
+				printk(KERN_ERR
+				       "level sensitive irq not supported");
+		} else if (val & 0x08) {
+			if (val & 0x04)
+				s->poll = 1;
+			if (val & 0x02)
+				s->read_reg_select = val & 1;
+			if (val & 0x40)
+				s->special_mask = (val >> 5) & 1;
+		} else {
+			cmd = val >> 5;
+			switch (cmd) {
+			case 0:
+			case 4:
+				s->rotate_on_auto_eoi = cmd >> 2;
+				break;
+			case 1:	/* end of interrupt */
+			case 5:
+				priority = get_priority(s, s->isr);
+				if (priority != 8) {
+					irq = (priority + s->priority_add) & 7;
+					s->isr &= ~(1 << irq);
+					if (cmd == 5)
+						s->priority_add = (irq + 1) & 7;
+					pic_update_irq(s->pics_state);
+				}
+				break;
+			case 3:
+				irq = val & 7;
+				s->isr &= ~(1 << irq);
+				pic_update_irq(s->pics_state);
+				break;
+			case 6:
+				s->priority_add = (val + 1) & 7;
+				pic_update_irq(s->pics_state);
+				break;
+			case 7:
+				irq = val & 7;
+				s->isr &= ~(1 << irq);
+				s->priority_add = (irq + 1) & 7;
+				pic_update_irq(s->pics_state);
+				break;
+			default:
+				/* no operation */
+				break;
+			}
+		}
+	} else {
+		switch (s->init_state) {
+		case 0:
+			/* normal mode */
+			s->imr = val;
+			pic_update_irq(s->pics_state);
+			break;
+		case 1:
+			s->irq_base = val & 0xf8;
+			s->init_state = 2;
+			break;
+		case 2:
+			if (s->init4) {
+				s->init_state = 3;
+			} else {
+				s->init_state = 0;
+			}
+			break;
+		case 3:
+			s->special_fully_nested_mode = (val >> 4) & 1;
+			s->auto_eoi = (val >> 1) & 1;
+			s->init_state = 0;
+			break;
+		}
+	}
+}
+
+static uint32_t pic_poll_read(struct pic_state *s, uint32_t addr1)
+{
+	int ret;
+
+	ret = pic_get_irq(s);
+	if (ret >= 0) {
+		if (addr1 >> 7) {
+			s->pics_state->pics[0].isr &= ~(1 << 2);
+			s->pics_state->pics[0].irr &= ~(1 << 2);
+		}
+		s->irr &= ~(1 << ret);
+		s->isr &= ~(1 << ret);
+		if (addr1 >> 7 || ret != 2)
+			pic_update_irq(s->pics_state);
+	} else {
+		ret = 0x07;
+		pic_update_irq(s->pics_state);
+	}
+
+	return ret;
+}
+
+static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
+{
+	struct pic_state *s = opaque;
+	unsigned int addr;
+	int ret;
+
+	addr = addr1;
+	addr &= 1;
+	if (s->poll) {
+		ret = pic_poll_read(s, addr1);
+		s->poll = 0;
+	} else {
+		if (addr == 0) {
+			if (s->read_reg_select)
+				ret = s->isr;
+			else
+				ret = s->irr;
+		} else {
+			ret = s->imr;
+		}
+	}
+	return ret;
+}
+
+static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+	struct pic_state *s = opaque;
+	s->elcr = val & s->elcr_mask;
+}
+
+static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
+{
+	struct pic_state *s = opaque;
+	return s->elcr;
+}
+
+static int picdev_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+	switch (addr) {
+		/* Master PIC */
+	case 0x20:
+	case 0x21:
+		/* Slave PIC */
+	case 0xa0:
+	case 0xa1:
+		/* elcr0  & 1 */
+	case 0x4d0:
+	case 0x4d1:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static void picdev_write(struct kvm_io_device *this,
+			 gpa_t addr, int len, const void *val)
+{
+	struct pic_state2 *s = this->private;
+	unsigned char data = *(unsigned char *)val;
+
+	if (len != 1) {
+		printk(KERN_ERR "PIC: non byte write\n");
+		return;
+	}
+	switch (addr) {
+	case 0x20:
+	case 0x21:
+	case 0xa0:
+	case 0xa1:
+		pic_ioport_write(&s->pics[addr >> 7], addr, data);
+		break;
+	case 0x4d0:
+	case 0x4d1:
+		elcr_ioport_write(&s->pics[addr & 1], addr, data);
+		break;
+	}
+}
+
+static void picdev_read(struct kvm_io_device *this,
+			gpa_t addr, int len, void *val)
+{
+	struct pic_state2 *s = this->private;
+	unsigned char data = 0;
+
+	if (len != 1) {
+		printk(KERN_ERR "PIC: non byte read\n");
+		return;
+	}
+	switch (addr) {
+	case 0x20:
+	case 0x21:
+	case 0xa0:
+	case 0xa1:
+		data = pic_ioport_read(&s->pics[addr >> 7], addr);
+		break;
+	case 0x4d0:
+	case 0x4d1:
+		data = elcr_ioport_read(&s->pics[addr & 1], addr);
+		break;
+	}
+	*(unsigned char *)val = data;
+}
+
+/* callback when PIC0 irq status changed */
+static void pic_irq_request(void *opaque, int level)
+{
+	struct kvm *kvm = opaque;
+
+	int_output(kvm->vpic) = level;
+}
+
+struct pic_state2 *create_pic(struct kvm *kvm)
+{
+	struct pic_state2 *s;
+	s = kzalloc(sizeof(struct pic_state2), GFP_KERNEL);
+	if (!s)
+		return NULL;
+	s->pics[0].elcr_mask = 0xf8;
+	s->pics[1].elcr_mask = 0xde;
+	s->irq_request = pic_irq_request;
+	s->irq_request_opaque = kvm;
+	s->pics[0].pics_state = s;
+	s->pics[1].pics_state = s;
+
+	/* Initialize PIO device */
+	s->dev.read = picdev_read;
+	s->dev.write = picdev_write;
+	s->dev.in_range = picdev_in_range;
+	s->dev.private = s;
+	kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
+	return s;
+}
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a7c5e6b..1e2d3ba 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -456,8 +456,15 @@ struct kvm {
 	struct file *filp;
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
+	void *vpic;
 };
 
+#define kernel_pic(kvm) ((kvm)->vpic)
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return (kernel_pic(kvm) != 0);
+}
+
 struct descriptor_table {
 	u16 limit;
 	unsigned long base;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7826f16..25f99fc 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -16,6 +16,7 @@
  */
 
 #include "kvm.h"
+#include "virq.h"
 
 #include <linux/kvm.h>
 #include <linux/module.h>
@@ -476,6 +477,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
+	if (kernel_pic(kvm))
+		kfree(kernel_pic(kvm));
 	kvm_io_bus_destroy(&kvm->pio_bus);
 	kvm_io_bus_destroy(&kvm->mmio_bus);
 	kvm_free_vcpus(kvm);
@@ -1372,7 +1375,8 @@ EXPORT_SYMBOL_GPL(emulate_instruction);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->irq_summary)
+	if (vcpu->irq_summary ||
+           (irqchip_in_kernel(vcpu->kvm) && cpu_has_interrupt(vcpu)))
 		return 1;
 
 	vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -2855,6 +2859,26 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+	case KVM_CREATE_PIC:
+		r = -ENOMEM;
+		kernel_pic(kvm) = create_pic(kvm);
+		if (kernel_pic(kvm)) {
+			r = 0;
+		}
+		break;
+	case KVM_SET_ISA_IRQ_LEVEL: {
+		struct kvm_irq_level irq_event;
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+		if (irqchip_in_kernel(kvm)) {
+			pic_set_irq_new(kernel_pic(kvm),
+					irq_event.irq,
+					irq_event.level);
+			r = 0;
+		}
+		break;
+	}
 	default:
 		;
 	}
diff --git a/drivers/kvm/virq.c b/drivers/kvm/virq.c
new file mode 100644
index 0000000..a807296
--- /dev/null
+++ b/drivers/kvm/virq.c
@@ -0,0 +1,58 @@
+/*
+ * virq.c: API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#include "kvm.h"
+#include "virq.h"
+
+/*
+ * check if there is pending interrupt without
+ * intack.
+ */
+int cpu_has_interrupt(struct kvm_vcpu *v)
+{
+	struct pic_state2 *s = kernel_pic(v->kvm);
+
+	/* PIC */
+	if (int_output(s))
+		return 1;
+	/* TODO: APIC */
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(cpu_has_interrupt);
+
+/*
+ * Read pending interrupt vector and intack.
+ */
+int cpu_get_interrupt(struct kvm_vcpu *v)
+{
+	struct pic_state2 *s = kernel_pic(v->kvm);
+	int vector;
+
+	int_output(s) = 0;
+	vector = pic_read_irq(s);
+	if (vector != -1)
+		return vector;
+	/* TODO: APIC */
+	return -1;
+}
+
+EXPORT_SYMBOL_GPL(cpu_get_interrupt);
diff --git a/drivers/kvm/virq.h b/drivers/kvm/virq.h
new file mode 100644
index 0000000..ed467dc
--- /dev/null
+++ b/drivers/kvm/virq.h
@@ -0,0 +1,70 @@
+/*
+ * virq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#ifndef __VIRQ_H
+#define __VIRQ_H
+
+#include "kvm.h"
+
+typedef void SetIRQFunc(void *opaque, int irq_num, int level);
+typedef void IRQRequestFunc(void *opaque, int level);
+
+struct PicState2;
+struct pic_state {
+	uint8_t last_irr;	/* edge detection */
+	uint8_t irr;		/* interrupt request register */
+	uint8_t imr;		/* interrupt mask register */
+	uint8_t isr;		/* interrupt service register */
+	uint8_t priority_add;	/* highest irq priority */
+	uint8_t irq_base;
+	uint8_t read_reg_select;
+	uint8_t poll;
+	uint8_t special_mask;
+	uint8_t init_state;
+	uint8_t auto_eoi;
+	uint8_t rotate_on_auto_eoi;
+	uint8_t special_fully_nested_mode;
+	uint8_t init4;		/* true if 4 byte init */
+	uint8_t elcr;		/* PIIX edge/trigger selection */
+	uint8_t elcr_mask;
+	struct pic_state2 *pics_state;
+};
+
+struct pic_state2 {
+	/* 0 is master pic, 1 is slave pic */
+	struct pic_state pics[2];
+	IRQRequestFunc *irq_request;
+	void *irq_request_opaque;
+	/* IOAPIC callback support */
+	SetIRQFunc *alt_irq_func;
+	void *alt_irq_opaque;
+	int output;		/* intr from master PIC */
+	struct kvm_io_device dev;
+};
+
+#define int_output(s)		(((struct pic_state2 *)s)->output)
+struct pic_state2 *create_pic(struct kvm *kvm);
+void pic_set_irq_new(void *opaque, int irq, int level);
+int pic_read_irq(struct pic_state2 *s);
+int cpu_get_interrupt(struct kvm_vcpu *v);
+int cpu_has_interrupt(struct kvm_vcpu *v);
+
+#endif
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index b47ddcc..cef0d64 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -16,6 +16,7 @@
  */
 
 #include "kvm.h"
+#include "virq.h"
 #include "vmx.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -1464,6 +1465,16 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
 	vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
 }
 
+static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	if (vcpu->rmode.active) {
+		inject_rmode_irq(vcpu, irq);
+		return;
+	}
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->irq_summary);
@@ -1473,13 +1484,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
 	if (!vcpu->irq_pending[word_index])
 		clear_bit(word_index, &vcpu->irq_summary);
-
-	if (vcpu->rmode.active) {
-		inject_rmode_irq(vcpu, irq);
-		return;
-	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	vmx_inject_irq(vcpu, irq);
 }
 
 
@@ -1563,7 +1568,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		       "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
 	}
 
-	if (is_external_interrupt(vect_info)) {
+	if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
 		set_bit(irq, vcpu->irq_pending);
 		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
@@ -1894,6 +1899,12 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 				   struct kvm_run *kvm_run)
 {
+	u32 cpu_based_vm_exec_control;
+
+	/* clear pending irq */
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
@@ -1986,6 +1997,56 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 	vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3));
 }
 
+void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+void do_intr_assist(struct kvm_vcpu *vcpu)
+{
+	u32 idtv_info_field, intr_info_field;
+	int has_ext_irq, interrupt_window_open;
+	/* TODO: Move IDT_Vectoring here */
+
+	has_ext_irq = cpu_has_interrupt(vcpu);
+	intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+	idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+	if (intr_info_field & INTR_INFO_VALID_MASK) {
+		if (idtv_info_field & INTR_INFO_VALID_MASK) {
+			/* TODO: fault when IDT_Vectoring */
+			printk(KERN_ERR "Fault when IDT_Vectoring\n");
+		}
+		if (has_ext_irq)
+			enable_irq_window(vcpu);
+		return;
+	}
+	if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
+
+		if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
+			vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+				vmcs_read32(IDT_VECTORING_ERROR_CODE));
+		if ( unlikely(has_ext_irq) )
+			enable_irq_window(vcpu);
+		return;
+	}
+	if (!has_ext_irq)
+		return;
+	interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+	if (interrupt_window_open)
+		vmx_inject_irq(vcpu, cpu_get_interrupt(vcpu));
+	else
+		enable_irq_window(vcpu);
+}
+
 static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	u8 fail;
@@ -1996,9 +2057,6 @@ preempted:
 		kvm_guest_debug_pre(vcpu);
 
 again:
-	if (!vcpu->mmio_read_completed)
-		do_interrupt_requests(vcpu, kvm_run);
-
 	vmx_save_host_state(vcpu);
 	kvm_load_guest_fpu(vcpu);
 
@@ -2012,6 +2070,10 @@ again:
 	vmcs_writel(HOST_CR0, read_cr0());
 
 	local_irq_disable();
+	if (irqchip_in_kernel(vcpu->kvm))
+		do_intr_assist(vcpu);
+	else if (!vcpu->mmio_read_completed)
+		do_interrupt_requests(vcpu, kvm_run);
 
 	vcpu->guest_mode = 1;
 	if (vcpu->requests)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e6edca8..fbcfc86 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -41,6 +41,12 @@ struct kvm_memory_alias {
 	__u64 target_phys_addr;
 };
 
+/* for KVM_SET_IRQ_LEVEL */
+struct kvm_irq_level {
+	__u32 irq;
+	__u32 level;
+};
+
 enum kvm_exit_reason {
 	KVM_EXIT_UNKNOWN          = 0,
 	KVM_EXIT_EXCEPTION        = 1,
@@ -282,6 +288,9 @@ struct kvm_signal_mask {
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+/* Device model IOC */
+#define KVM_CREATE_PIC		  _IO(KVMIO,  0x60)
+#define KVM_SET_ISA_IRQ_LEVEL	  _IO(KVMIO,  0x61)
 
 /*
  * ioctls for vcpu fds
