Dong, Eddie wrote:
> Avi:
> Per our discussion, we will only support all user level irqchip
> or all kernel level irqchip.
> Here is the patch against lapic2 that passed RHEL5U test. Please give
> comments.
>
>
> thx,eddie
>
>
General comments:
- please split into a hlt patch, lapic patch, and ioapic patch. the
last patch can enable the irqchip capability, but intermediate results
have to be compilable.
- document files that were taken from Xen or qemu; specify what
revision was used as base
More comments below.
>
>
>
> diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
> index 952dff3..b29651b 100644
> --- a/drivers/kvm/Makefile
> +++ b/drivers/kvm/Makefile
> @@ -2,7 +2,7 @@
> # Makefile for Kernel-based Virtual Machine module
> #
>
> -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
> +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o kvm_irq.o lapic.o
> ioapic.o
>
irq.c was renamed to kvm_irq.c? why?
> +
> +static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
> + struct kvm_lapic *target,
> + u8 vector, u8 trig_mode, u8 delivery_mode)
> +{
> + ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
> + delivery_mode);
> +
> + ASSERT((delivery_mode == dest_Fixed) ||
> + (delivery_mode == dest_LowestPrio));
> +
> + if (kvm_apic_set_irq(target, vector, trig_mode))
> + kvm_vcpu_kick(target->vcpu);
> +}
>
Put kvm_vcpu_kick() into kvm_apic_set_irq() so that callers need not do
that themselves.
> +
> + switch (delivery_mode) {
> + case dest_LowestPrio:
>
Wierd constant. How about IOAPIC_DEST_LOWEST_PRIO?
> + target =
> + kvm_apic_round_robin(ioapic->kvm, vector,
> deliver_bitmask);
> + if (target != NULL) {
> + ioapic_inj_irq(ioapic, target, vector,
> + trig_mode, delivery_mode);
> + } else {
> + ioapic_debug("null round robin: "
> + "mask=%x vector=%x
> delivery_mode=%x",
> + deliver_bitmask, vector,
> dest_LowestPrio);
> + }
>
Unnecessary {}.
> +
> +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
> +{
> + int i;
> +
> + for (i = 0; i < IOAPIC_NUM_PINS; i++)
> + if (ioapic->redirtbl[i].fields.vector == vector)
> + return i;
> +
> + return -1;
> +}
>
blank line.
> +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
> +{
> + struct kvm_ioapic *ioapic = kvm->vioapic;
> + union ioapic_redir_entry *ent;
> + int gsi;
> +
> + gsi = get_eoi_gsi(ioapic, vector);
> + if (gsi == -1) {
> + printk(KERN_WARNING "Can't find redir item for %d
> EOI\n",
> + vector);
> + return;
> + }
> +
> + ent = &ioapic->redirtbl[gsi];
> + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
> +
> + ent->fields.remote_irr = 0;
> + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) {
> + ioapic_deliver(ioapic, gsi);
> + }
>
excess braces.
> +
> +}
> +
> +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
> +{
> + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> +
> + return ((addr >= ioapic->base_address &&
> + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
> +}
> +
> +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr,
> int len,
> + void *val)
> +{
> + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> + u32 result;
> +
> + ioapic_debug("addr %lx", (unsigned long)addr);
> + ASSERT(!(addr & 0xf)); /* check alignment */
> +
> + addr &= 0xff;
> +
> + switch (addr) {
> + case IOAPIC_REG_SELECT:
> + result = ioapic->ioregsel;
> + break;
> +
> + case IOAPIC_REG_WINDOW:
> + result = ioapic_read_indirect(ioapic, addr, len);
> + break;
> +
> + default:
> + result = 0;
> + break;
> + }
> + switch (len) {
> + case 1:
> + case 2:
> + case 4:
> + case 8:
> + memcpy(val, (char *)&result, len);
>
If len == 8, you're copying a bit of kernel stack into the guest. While
it's hardly a security hole, we'd better not do that.
> +static void vcpu_kick_intr(void *info)
> +{
> +#ifdef DEBUG
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
> + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
> +#endif
> +}
> +
> +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
> +{
> + int ipi_pcpu = vcpu->cpu;
> +
> + if (vcpu->guest_mode)
> + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
> 0, 0);
> +}
>
What if it's in hlt state?
> +
> diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
> index a6b3869..4b1b4b7 100644
> --- a/drivers/kvm/irq.h
> +++ b/drivers/kvm/irq.h
> @@ -26,12 +26,11 @@
>
> typedef void irq_request_func(void *opaque, int level);
>
> -struct kvm_pic;
> struct kvm_pic_state {
> - u8 last_irr; /* edge detection */
> - u8 irr; /* interrupt request register */
> - u8 imr; /* interrupt mask register */
> - u8 isr; /* interrupt service register */
> + u8 last_irr; /* edge detection */
> + u8 irr; /* interrupt request register */
> + u8 imr; /* interrupt mask register */
> + u8 isr; /* interrupt service register */
> u8 priority_add; /* highest irq priority */
> u8 irq_base;
> u8 read_reg_select;
> @@ -48,7 +47,7 @@ struct kvm_pic_state {
> };
>
> struct kvm_pic {
> - struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave pic
> */
> + struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave
> pic */
> irq_request_func *irq_request;
> void *irq_request_opaque;
> int output; /* intr from master PIC */
>
Please separate the pic changes so I can fold them into the existing pic
patch.
> +
> +struct kvm_ioapic {
> + struct kvm_io_device dev;
> + unsigned long base_address;
> + struct kvm *kvm;
> + u32 ioregsel;
> + u32 id;
> + u32 irr;
> + union ioapic_redir_entry {
> + u64 bits;
> + struct {
> + u8 vector;
> + u8 delivery_mode:3;
> + u8 dest_mode:1;
> + u8 delivery_status:1;
> + u8 polarity:1;
> + u8 remote_irr:1;
> + u8 trig_mode:1;
> + u8 mask:1;
> + u8 reserve:7;
> + u8 reserved[4];
> + u8 dest_id;
> + } fields;
> + } redirtbl[IOAPIC_NUM_PINS];
> +};
>
Which lock protects this?
> +
> +struct kvm_lapic {
> + spinlock_t lock; /* TODO: need? */
>
I think not. Maybe when we have msi support?
> +
> +#ifdef DEBUG
> +#define ASSERT(x)
> \
> + if (!(x)) {
> \
> + printk(KERN_EMERG "assertion failed %s: %d: %s\n",
> \
> + __FILE__, __LINE__, #x);
> \
> + BUG();
> \
> + }
>
Wrap in a do { } while (0) to avoid surprises.
> #endif
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index f1a6773..a886ba9 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -334,15 +334,13 @@ struct kvm_vcpu {
> };
> struct mutex mutex;
> int cpu;
> - int launched;
> + char vcpu_id;
>
There is already a vcpu_id in kvm.git...
> + char launched;
>
???
>
> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
> +{
> + if (irqchip_in_kernel(vcpu->kvm))
> + return kvm_lapic_get_cr8(vcpu);
> + else
> + return vcpu->cr8;
> +}
> +EXPORT_SYMBOL_GPL(get_cr8);
>
How about keep vcpu->cr8 even with kernel lapic? then we don't need this.
> +
> +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
> +{
> + if (irqchip_in_kernel(vcpu->kvm))
> + return vcpu->apic->base_msr;
> + else
> + return vcpu->apic_base;
> +}
> +EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>
ditto.
> +
> +#define VEC_POS(v) ((v) & (32 - 1))
> +#define REG_POS(v) (((v) >> 5) << 4)
> +#define apic_test_and_set_vector(vec, bitmap) \
> + test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_test_and_clear_vector(vec, bitmap) \
> + test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_set_vector(vec, bitmap) \
> + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_clear_vector(vec, bitmap) \
> + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +
> +#define apic_hw_enabled(apic) ((apic)->base_msr &
> MSR_IA32_APICBASE_ENABLE)
> +#define apic_sw_enabled(apic) (!((apic)->status & APIC_SW_DISABLE))
> +#define apic_enabled(apic) (apic_sw_enabled(apic) && \
> + apic_hw_enabled(apic))
>
These would be better as inline functions (type checking, etc.)
> +
> +#define KVM_APIC_ID(apic) \
> + (GET_APIC_ID(apic_get_reg(apic, APIC_ID)))
> +
> +#define apic_lvt_enabled(apic, lvt_type) \
> + (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED))
> +
> +#define apic_lvt_vector(apic, lvt_type) \
> + (apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK)
> +
> +#define apic_lvt_dm(apic, lvt_type) \
> + (apic_get_reg(apic, lvt_type) & APIC_MODE_MASK)
> +
> +#define apic_lvtt_period(apic) \
> + (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC)
>
more functions
Where are the state save/restore? they can be added later, so long as
the capability is enabled only after everything is working.
--
Do not meddle in the internals of kernels, for they are subtle and quick to
panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel