In kernel PIC support.

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>

diff --git a/kernel/Kbuild b/kernel/Kbuild
index e9bcda7..ebb55cd 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,5 +1,5 @@
 EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
 obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o irq.o i8259.o
 kvm-intel-objs := vmx.o vmx-debug.o
 kvm-amd-objs := svm.o
diff --git a/qemu/cpu-defs.h b/qemu/cpu-defs.h
index 0b49c89..67ba41e 100644
--- a/qemu/cpu-defs.h
+++ b/qemu/cpu-defs.h
@@ -75,6 +75,7 @@ typedef unsigned long ram_addr_t;
 #define EXCP_HLT        0x10001 /* hlt instruction reached */
 #define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
 #define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
+#define EXCP_IO_WINDOW  0x10004 /* IO Window for user irq injection */
 #define MAX_BREAKPOINTS 32
 
 #define TB_JMP_CACHE_BITS 12
diff --git a/qemu/hw/i8259.c b/qemu/hw/i8259.c
index a2a8187..117340c 100644
--- a/qemu/hw/i8259.c
+++ b/qemu/hw/i8259.c
@@ -177,7 +177,12 @@ int64_t irq_time[16];
 void pic_set_irq_new(void *opaque, int irq, int level)
 {
     PicState2 *s = opaque;
+#ifdef USE_KVM
+    extern int kvm_set_irq(int irq, int level);
 
+    if (kvm_set_irq(irq, level))
+        return;
+#endif
 #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
     if (level != irq_level[irq]) {
 #if defined(DEBUG_PIC)
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 1849997..811b320 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -601,6 +601,7 @@ static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
 
 static int kvm_io_window(void *opaque)
 {
+    env->exception_index = EXCP_IO_WINDOW;
     return 1;
 }
 
@@ -612,7 +613,8 @@ static int kvm_halt(void *opaque, int vcpu)
     env = envs[0];
     if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
 	  (env->eflags & IF_MASK))) {
-	    env->hflags |= HF_HALTED_MASK;
+	    /* TODO: for halt emulation, temply walkaround now */
+	    /* env->hflags |= HF_HALTED_MASK; */
 	    env->exception_index = EXCP_HLT;
     }
 
@@ -923,4 +925,9 @@ int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
  out:
     return r;
 }
+
+int kvm_set_irq(int irq, int level)
+{
+    return kvm_set_irq_level(kvm_context, irq, level);
+}
 #endif
diff --git a/user/kvmctl.c b/user/kvmctl.c
index 054547c..b9c9f2b 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -66,6 +66,7 @@ struct kvm_context {
 	int dirty_pages_log_all;
 	/// memory regions parameters
 	struct kvm_memory_region mem_regions[KVM_MAX_NUM_MEM_REGIONS];
+	int irqchip_in_kernel;
 };
 
 /*
@@ -288,6 +289,15 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
 	     MAP_PRIVATE|MAP_FIXED, zfd, 0);
 	close(zfd);
 
+	kvm->irqchip_in_kernel = 0;
+	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIC);
+	if (r > 0) {	/* kernel irqchip supported */
+		r = ioctl(fd, KVM_CREATE_PIC);
+		if (r >= 0)
+			kvm->irqchip_in_kernel = 1;
+		else
+			printf("Create kernel PIC irqchip failed\n");
+	}
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
@@ -407,6 +417,21 @@ int kvm_get_mem_map(kvm_context_t kvm, int slot, void *buf)
 #endif /* KVM_GET_MEM_MAP */
 }
 
+int kvm_set_irq_level(kvm_context_t kvm, int irq, int level)
+{
+	struct kvm_irq_level event;
+	int r;
+
+	if (!kvm->irqchip_in_kernel)
+		return 0;
+	event.level = level;
+	event.gsi = irq;
+	r = ioctl(kvm->vm_fd, KVM_IRQ_LINE, &event);
+	if (r == -1)
+		perror("kvm_set_irq_level");
+	return 1;
+}
+
 static int handle_io_abi10(kvm_context_t kvm, struct kvm_run_abi10 *run,
 			   int vcpu)
 {
@@ -944,7 +969,8 @@ int kvm_run(kvm_context_t kvm, int vcpu)
 		return kvm_run_abi10(kvm, vcpu);
 
 again:
-	run->request_interrupt_window = try_push_interrupts(kvm);
+	if (!kvm->irqchip_in_kernel)
+		run->request_interrupt_window = try_push_interrupts(kvm);
 	pre_kvm_run(kvm, vcpu);
 	r = ioctl(fd, KVM_RUN, 0);
 	post_kvm_run(kvm, vcpu);
@@ -1022,6 +1048,25 @@ int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
 	return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
 }
 
+static void cpuid_remove_apic(struct kvm_cpuid *cpuid)
+{
+	int i;
+	struct kvm_cpuid_entry *e, *entry;
+
+	entry = NULL;
+	for (i = 0; i < cpuid->nent; ++i) {
+		e = &cpuid->entries[i];
+		if (e->function == 1) {
+			entry = e;
+			break;
+		}
+	}
+	if (entry) {
+		entry->edx &= ~(1 << 9);
+		printf("Guest APIC capibility removed\n");
+	}
+}
+
 int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
 		    struct kvm_cpuid_entry *entries)
 {
@@ -1034,6 +1079,8 @@ int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
 
 	cpuid->nent = nent;
 	memcpy(cpuid->entries, entries, nent * sizeof(*entries));
+	/* temply walkaround before merge of in-kernel APIC */
+	cpuid_remove_apic(cpuid);
 	r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID, cpuid);
 
 	free(cpuid);
diff --git a/user/kvmctl.h b/user/kvmctl.h
index 956e194..68e1664 100644
--- a/user/kvmctl.h
+++ b/user/kvmctl.h
@@ -395,6 +395,7 @@ int kvm_destroy_memory_alias(kvm_context_t, int slot);
  * \param bitmap Long aligned address of a big enough bitmap (one bit per page)
  */
 int kvm_get_mem_map(kvm_context_t kvm, int slot, void *bitmap);
+int kvm_set_irq_level(kvm_context_t kvm, int irq, int level);
 
 /*!
  * \brief Enable dirty-pages-logging for all memory regions
