Dong, Eddie wrote: > Avi: > Per our discussion, we will only support all user level irqchip > or all kernel level irqchip. > Here is the patch against lapic2 that passed RHEL5U test. Please give > comments. > > > thx,eddie > >
General comments: - please split into a hlt patch, lapic patch, and ioapic patch. the last patch can enable the irqchip capability, but intermediate results have to be compilable. - document files that were taken from Xen or qemu; specify what revision was used as base More comments below. > > > > diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile > index 952dff3..b29651b 100644 > --- a/drivers/kvm/Makefile > +++ b/drivers/kvm/Makefile > @@ -2,7 +2,7 @@ > # Makefile for Kernel-based Virtual Machine module > # > > -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o > +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o kvm_irq.o lapic.o > ioapic.o > irq.c was renamed to kvm_irq.c? why? > + > +static void ioapic_inj_irq(struct kvm_ioapic *ioapic, > + struct kvm_lapic *target, > + u8 vector, u8 trig_mode, u8 delivery_mode) > +{ > + ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode, > + delivery_mode); > + > + ASSERT((delivery_mode == dest_Fixed) || > + (delivery_mode == dest_LowestPrio)); > + > + if (kvm_apic_set_irq(target, vector, trig_mode)) > + kvm_vcpu_kick(target->vcpu); > +} > Put kvm_vcpu_kick() into kvm_apic_set_irq() so that callers need not do that themselves. > + > + switch (delivery_mode) { > + case dest_LowestPrio: > Wierd constant. How about IOAPIC_DEST_LOWEST_PRIO? > + target = > + kvm_apic_round_robin(ioapic->kvm, vector, > deliver_bitmask); > + if (target != NULL) { > + ioapic_inj_irq(ioapic, target, vector, > + trig_mode, delivery_mode); > + } else { > + ioapic_debug("null round robin: " > + "mask=%x vector=%x > delivery_mode=%x", > + deliver_bitmask, vector, > dest_LowestPrio); > + } > Unnecessary {}. > + > +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) > +{ > + int i; > + > + for (i = 0; i < IOAPIC_NUM_PINS; i++) > + if (ioapic->redirtbl[i].fields.vector == vector) > + return i; > + > + return -1; > +} > blank line. > +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) > +{ > + struct kvm_ioapic *ioapic = kvm->vioapic; > + union ioapic_redir_entry *ent; > + int gsi; > + > + gsi = get_eoi_gsi(ioapic, vector); > + if (gsi == -1) { > + printk(KERN_WARNING "Can't find redir item for %d > EOI\n", > + vector); > + return; > + } > + > + ent = &ioapic->redirtbl[gsi]; > + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); > + > + ent->fields.remote_irr = 0; > + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) { > + ioapic_deliver(ioapic, gsi); > + } > excess braces. > + > +} > + > +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) > +{ > + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; > + > + return ((addr >= ioapic->base_address && > + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); > +} > + > +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, > int len, > + void *val) > +{ > + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; > + u32 result; > + > + ioapic_debug("addr %lx", (unsigned long)addr); > + ASSERT(!(addr & 0xf)); /* check alignment */ > + > + addr &= 0xff; > + > + switch (addr) { > + case IOAPIC_REG_SELECT: > + result = ioapic->ioregsel; > + break; > + > + case IOAPIC_REG_WINDOW: > + result = ioapic_read_indirect(ioapic, addr, len); > + break; > + > + default: > + result = 0; > + break; > + } > + switch (len) { > + case 1: > + case 2: > + case 4: > + case 8: > + memcpy(val, (char *)&result, len); > If len == 8, you're copying a bit of kernel stack into the guest. While it's hardly a security hole, we'd better not do that. > +static void vcpu_kick_intr(void *info) > +{ > +#ifdef DEBUG > + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; > + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); > +#endif > +} > + > +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) > +{ > + int ipi_pcpu = vcpu->cpu; > + > + if (vcpu->guest_mode) > + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, > 0, 0); > +} > What if it's in hlt state? > + > diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h > index a6b3869..4b1b4b7 100644 > --- a/drivers/kvm/irq.h > +++ b/drivers/kvm/irq.h > @@ -26,12 +26,11 @@ > > typedef void irq_request_func(void *opaque, int level); > > -struct kvm_pic; > struct kvm_pic_state { > - u8 last_irr; /* edge detection */ > - u8 irr; /* interrupt request register */ > - u8 imr; /* interrupt mask register */ > - u8 isr; /* interrupt service register */ > + u8 last_irr; /* edge detection */ > + u8 irr; /* interrupt request register */ > + u8 imr; /* interrupt mask register */ > + u8 isr; /* interrupt service register */ > u8 priority_add; /* highest irq priority */ > u8 irq_base; > u8 read_reg_select; > @@ -48,7 +47,7 @@ struct kvm_pic_state { > }; > > struct kvm_pic { > - struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave pic > */ > + struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave > pic */ > irq_request_func *irq_request; > void *irq_request_opaque; > int output; /* intr from master PIC */ > Please separate the pic changes so I can fold them into the existing pic patch. > + > +struct kvm_ioapic { > + struct kvm_io_device dev; > + unsigned long base_address; > + struct kvm *kvm; > + u32 ioregsel; > + u32 id; > + u32 irr; > + union ioapic_redir_entry { > + u64 bits; > + struct { > + u8 vector; > + u8 delivery_mode:3; > + u8 dest_mode:1; > + u8 delivery_status:1; > + u8 polarity:1; > + u8 remote_irr:1; > + u8 trig_mode:1; > + u8 mask:1; > + u8 reserve:7; > + u8 reserved[4]; > + u8 dest_id; > + } fields; > + } redirtbl[IOAPIC_NUM_PINS]; > +}; > Which lock protects this? > + > +struct kvm_lapic { > + spinlock_t lock; /* TODO: need? */ > I think not. Maybe when we have msi support? > + > +#ifdef DEBUG > +#define ASSERT(x) > \ > + if (!(x)) { > \ > + printk(KERN_EMERG "assertion failed %s: %d: %s\n", > \ > + __FILE__, __LINE__, #x); > \ > + BUG(); > \ > + } > Wrap in a do { } while (0) to avoid surprises. > #endif > diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h > index f1a6773..a886ba9 100644 > --- a/drivers/kvm/kvm.h > +++ b/drivers/kvm/kvm.h > @@ -334,15 +334,13 @@ struct kvm_vcpu { > }; > struct mutex mutex; > int cpu; > - int launched; > + char vcpu_id; > There is already a vcpu_id in kvm.git... > + char launched; > ??? > > +unsigned long get_cr8(struct kvm_vcpu *vcpu) > +{ > + if (irqchip_in_kernel(vcpu->kvm)) > + return kvm_lapic_get_cr8(vcpu); > + else > + return vcpu->cr8; > +} > +EXPORT_SYMBOL_GPL(get_cr8); > How about keep vcpu->cr8 even with kernel lapic? then we don't need this. > + > +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) > +{ > + if (irqchip_in_kernel(vcpu->kvm)) > + return vcpu->apic->base_msr; > + else > + return vcpu->apic_base; > +} > +EXPORT_SYMBOL_GPL(kvm_get_apic_base); > ditto. > + > +#define VEC_POS(v) ((v) & (32 - 1)) > +#define REG_POS(v) (((v) >> 5) << 4) > +#define apic_test_and_set_vector(vec, bitmap) \ > + test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) > +#define apic_test_and_clear_vector(vec, bitmap) \ > + test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) > +#define apic_set_vector(vec, bitmap) \ > + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) > +#define apic_clear_vector(vec, bitmap) \ > + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)) > + > +#define apic_hw_enabled(apic) ((apic)->base_msr & > MSR_IA32_APICBASE_ENABLE) > +#define apic_sw_enabled(apic) (!((apic)->status & APIC_SW_DISABLE)) > +#define apic_enabled(apic) (apic_sw_enabled(apic) && \ > + apic_hw_enabled(apic)) > These would be better as inline functions (type checking, etc.) > + > +#define KVM_APIC_ID(apic) \ > + (GET_APIC_ID(apic_get_reg(apic, APIC_ID))) > + > +#define apic_lvt_enabled(apic, lvt_type) \ > + (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED)) > + > +#define apic_lvt_vector(apic, lvt_type) \ > + (apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK) > + > +#define apic_lvt_dm(apic, lvt_type) \ > + (apic_get_reg(apic, lvt_type) & APIC_MODE_MASK) > + > +#define apic_lvtt_period(apic) \ > + (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC) > more functions Where are the state save/restore? they can be added later, so long as the capability is enabled only after everything is working. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel