Dong, Eddie wrote:
> Avi:
>       Per our discussion, we will only support all user level irqchip
> or all kernel level irqchip.  
> Here is the patch against lapic2 that passed RHEL5U test. Please give
> comments.
>
>
> thx,eddie
>
>   

General comments:

 - please split into a hlt patch, lapic patch, and ioapic patch.  the
last patch can enable the irqchip capability, but intermediate results
have to be compilable.
 - document files that were taken from Xen or qemu; specify what
revision was used as base


More comments below.

>
>
>
> diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
> index 952dff3..b29651b 100644
> --- a/drivers/kvm/Makefile
> +++ b/drivers/kvm/Makefile
> @@ -2,7 +2,7 @@
>  # Makefile for Kernel-based Virtual Machine module
>  #
>  
> -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
> +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o kvm_irq.o lapic.o
> ioapic.o
>   

irq.c was renamed to kvm_irq.c? why?

> +
> +static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
> +                        struct kvm_lapic *target,
> +                        u8 vector, u8 trig_mode, u8 delivery_mode)
> +{
> +     ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
> +                  delivery_mode);
> +
> +     ASSERT((delivery_mode == dest_Fixed) ||
> +            (delivery_mode == dest_LowestPrio));
> +
> +     if (kvm_apic_set_irq(target, vector, trig_mode))
> +             kvm_vcpu_kick(target->vcpu);
> +}
>   

Put kvm_vcpu_kick() into kvm_apic_set_irq() so that callers need not do
that themselves.

> +
> +     switch (delivery_mode) {
> +     case dest_LowestPrio:
>   

Wierd constant.  How about IOAPIC_DEST_LOWEST_PRIO?

> +             target =
> +                 kvm_apic_round_robin(ioapic->kvm, vector,
> deliver_bitmask);
> +             if (target != NULL) {
> +                     ioapic_inj_irq(ioapic, target, vector,
> +                                    trig_mode, delivery_mode);
> +             } else {
> +                     ioapic_debug("null round robin: "
> +                                  "mask=%x vector=%x
> delivery_mode=%x",
> +                                  deliver_bitmask, vector,
> dest_LowestPrio);
> +             }
>   

Unnecessary {}.

> +
> +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
> +{
> +     int i;
> +
> +     for (i = 0; i < IOAPIC_NUM_PINS; i++)
> +             if (ioapic->redirtbl[i].fields.vector == vector)
> +                     return i;
> +
> +     return -1;
> +}
>   

blank line.

> +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
> +{
> +     struct kvm_ioapic *ioapic = kvm->vioapic;
> +     union ioapic_redir_entry *ent;
> +     int gsi;
> +
> +     gsi = get_eoi_gsi(ioapic, vector);
> +     if (gsi == -1) {
> +             printk(KERN_WARNING "Can't find redir item for %d
> EOI\n",
> +                    vector);
> +             return;
> +     }
> +
> +     ent = &ioapic->redirtbl[gsi];
> +     ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
> +
> +     ent->fields.remote_irr = 0;
> +     if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) {
> +             ioapic_deliver(ioapic, gsi);
> +     }
>   

excess braces.


> +
> +}
> +
> +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
> +{
> +     struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> +
> +     return ((addr >= ioapic->base_address &&
> +              (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
> +}
> +
> +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr,
> int len,
> +                          void *val)
> +{
> +     struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> +     u32 result;
> +
> +     ioapic_debug("addr %lx", (unsigned long)addr);
> +     ASSERT(!(addr & 0xf));  /* check alignment */
> +
> +     addr &= 0xff;
> +
> +     switch (addr) {
> +     case IOAPIC_REG_SELECT:
> +             result = ioapic->ioregsel;
> +             break;
> +
> +     case IOAPIC_REG_WINDOW:
> +             result = ioapic_read_indirect(ioapic, addr, len);
> +             break;
> +
> +     default:
> +             result = 0;
> +             break;
> +     }
> +     switch (len) {
> +     case 1:
> +     case 2:
> +     case 4:
> +     case 8:
> +             memcpy(val, (char *)&result, len);
>   

If len == 8, you're copying a bit of kernel stack into the guest.  While
it's hardly a security hole, we'd better not do that.

> +static void vcpu_kick_intr(void *info)
> +{
> +#ifdef DEBUG
> +     struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
> +     printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
> +#endif
> +}
> +
> +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
> +{
> +     int ipi_pcpu = vcpu->cpu;
> +
> +     if (vcpu->guest_mode)
> +             smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
> 0, 0);
> +}
>   

What if it's in hlt state?

> +
> diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
> index a6b3869..4b1b4b7 100644
> --- a/drivers/kvm/irq.h
> +++ b/drivers/kvm/irq.h
> @@ -26,12 +26,11 @@
>  
>  typedef void irq_request_func(void *opaque, int level);
>  
> -struct kvm_pic;
>  struct kvm_pic_state {
> -     u8 last_irr;    /* edge detection */
> -     u8 irr;         /* interrupt request register */
> -     u8 imr;         /* interrupt mask register */
> -     u8 isr;         /* interrupt service register */
> +     u8 last_irr;            /* edge detection */
> +     u8 irr;                 /* interrupt request register */
> +     u8 imr;                 /* interrupt mask register */
> +     u8 isr;                 /* interrupt service register */
>       u8 priority_add;        /* highest irq priority */
>       u8 irq_base;
>       u8 read_reg_select;
> @@ -48,7 +47,7 @@ struct kvm_pic_state {
>  };
>  
>  struct kvm_pic {
> -     struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave pic
> */
> +     struct kvm_pic_state pics[2];   /* 0 is master pic, 1 is slave
> pic */
>       irq_request_func *irq_request;
>       void *irq_request_opaque;
>       int output;             /* intr from master PIC */
>   

Please separate the pic changes so I can fold them into the existing pic
patch.

> +
> +struct kvm_ioapic {
> +     struct kvm_io_device dev;
> +     unsigned long base_address;
> +     struct kvm *kvm;
> +     u32 ioregsel;
> +     u32 id;
> +     u32 irr;
> +     union ioapic_redir_entry {
> +             u64 bits;
> +             struct {
> +                     u8 vector;
> +                     u8 delivery_mode:3;
> +                     u8 dest_mode:1;
> +                     u8 delivery_status:1;
> +                     u8 polarity:1;
> +                     u8 remote_irr:1;
> +                     u8 trig_mode:1;
> +                     u8 mask:1;
> +                     u8 reserve:7;
> +                     u8 reserved[4];
> +                     u8 dest_id;
> +             } fields;
> +     } redirtbl[IOAPIC_NUM_PINS];
> +};
>   

Which lock protects this?

> +
> +struct kvm_lapic {
> +     spinlock_t lock;        /* TODO: need? */
>   

I think not.  Maybe when we have msi support?

> +
> +#ifdef DEBUG
> +#define ASSERT(x)
> \
> +     if (!(x)) {
> \
> +             printk(KERN_EMERG "assertion failed %s: %d: %s\n",
> \
> +                    __FILE__, __LINE__, #x);
> \
> +             BUG();
> \
> +     }
>   

Wrap in a do { } while (0) to avoid surprises.


>  #endif
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index f1a6773..a886ba9 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -334,15 +334,13 @@ struct kvm_vcpu {
>       };
>       struct mutex mutex;
>       int   cpu;
> -     int   launched;
> +     char  vcpu_id;
>   

There is already a vcpu_id in kvm.git...

> +     char  launched;
>   


???

>  
> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
> +{
> +     if (irqchip_in_kernel(vcpu->kvm))
> +             return kvm_lapic_get_cr8(vcpu);
> +     else
> +             return vcpu->cr8;
> +}
> +EXPORT_SYMBOL_GPL(get_cr8);
>   

How about keep vcpu->cr8 even with kernel lapic?  then we don't need this.

> +
> +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
> +{
> +     if (irqchip_in_kernel(vcpu->kvm))
> +             return vcpu->apic->base_msr;
> +     else
> +             return vcpu->apic_base;
> +}
> +EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>   

ditto.

> +
> +#define VEC_POS(v) ((v) & (32 - 1))
> +#define REG_POS(v) (((v) >> 5) << 4)
> +#define apic_test_and_set_vector(vec, bitmap)                 \
> +    test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_test_and_clear_vector(vec, bitmap)               \
> +    test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_set_vector(vec, bitmap)                          \
> +    set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_clear_vector(vec, bitmap)                        \
> +    clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +
> +#define apic_hw_enabled(apic)        ((apic)->base_msr &
> MSR_IA32_APICBASE_ENABLE)
> +#define apic_sw_enabled(apic)        (!((apic)->status & APIC_SW_DISABLE))
> +#define apic_enabled(apic)   (apic_sw_enabled(apic) &&       \
> +     apic_hw_enabled(apic))
>   

These would be better as inline functions (type checking, etc.)

> +
> +#define KVM_APIC_ID(apic)    \
> +     (GET_APIC_ID(apic_get_reg(apic, APIC_ID)))
> +
> +#define apic_lvt_enabled(apic, lvt_type)     \
> +     (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED))
> +
> +#define apic_lvt_vector(apic, lvt_type)              \
> +     (apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK)
> +
> +#define apic_lvt_dm(apic, lvt_type)  \
> +     (apic_get_reg(apic, lvt_type) & APIC_MODE_MASK)
> +
> +#define apic_lvtt_period(apic)       \
> +     (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC)
>   

more functions


Where are the state save/restore?  they can be added later, so long as
the capability is enabled only after everything is working.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to