Gregory Haskins wrote:
> The current code is geared towards using a user-mode (A)PIC. This patch adds
> an "irqdevice" abstraction, and implements a "userint" model to handle the
> duties of the original code. Later, we can develop other irqdevice models
> to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
>
> +
> +typedef enum {
> + kvm_irqpin_localint,
> + kvm_irqpin_extint,
> + kvm_irqpin_smi,
> + kvm_irqpin_nmi,
> + kvm_irqpin_invalid, /* must always be last */
> +}kvm_irqpin_t;
>
This describes the processor irq pins, as opposed to an interrupt
controller's irq pins, yes? If so, let the name reflect that (and let
there be a space after the closing brace).
> +
> +#define KVM_IRQACK_VALID (1 << 0)
> +#define KVM_IRQACK_AGAIN (1 << 1)
> +#define KVM_IRQACK_TPRMASK (1 << 2)
> +
> +struct kvm_irqsink {
> + void (*set_intr)(struct kvm_irqsink *this,
> + struct kvm_irqdevice *dev,
> + kvm_irqpin_t pin, int trigger, int value);
> +
> + void *private;
> +};
> +
> +struct kvm_irqdevice {
> + int (*ack)(struct kvm_irqdevice *this, int *vector);
> + int (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
> + int (*summary)(struct kvm_irqdevice *this, void *data);
> + void (*destructor)(struct kvm_irqdevice *this);
>
[do we actually need a virtual destructor?]
> +/**
> + * kvm_irqdevice_ack - read and ack the highest priority vector from the
> device
> + * @dev: The device
> + * @vector: Retrieves the highest priority pending vector
> + * [ NULL = Dont ack a vector, just check pending status]
> + * [ non-NULL = Pointer to recieve vector data (out only)]
> + *
> + * Description: Read the highest priority pending vector from the device,
> + * potentially invoking auto-EOI depending on device policy
> + *
> + * Returns: (int)
> + * [ -1 = failure]
> + * [>=0 = bitmap as follows: ]
> + * [ KVM_IRQACK_VALID = vector is valid]
> + * [ KVM_IRQACK_AGAIN = more unmasked vectors are available]
> + * [ KVM_IRQACK_TPRMASK = TPR masked vectors are blocked]
> + */
> +static inline int kvm_irqdevice_ack(struct kvm_irqdevice *dev,
> + int *vector)
> +{
> + return dev->ack(dev, vector);
> +}
>
This is an improvement over the previous patch, but I'm vaguely
disturbed by the complexity of the return code. I don't have an
alternative to suggest at this time, though.
> +
> +/**
> + * kvm_irqdevice_summary - loads a summary bitmask
> + * @dev: The device
> + * @data: A pointer to a region capable of holding a 256 bit bitmap
> + *
> + * Description: Loads a summary bitmask of all pending vectors (0-255)
> + *
> + * Returns: (int)
> + * [-1 = failure]
> + * [ 0 = success]
> + */
> +static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void
> *data)
> +{
> + return dev->summary(dev, data);
> +}
>
This really works only for the userint case. It can be dropped from the
generic interface IMO. Each interrupt controller will have its own save
restore interface which userspace will have to know about (as it has to
know about configuring the interrupt controller).
> +/**
> + * kvm_irqdevice_set_intr - invokes a registered INTR callback
> + * @dev: The device
> + * @pin: Identifies the pin to alter -
> + * [ KVM_IRQPIN_LOCALINT (default) - an vector is pending on this
> + * device]
> + * [ KVM_IRQPIN_EXTINT - a vector is pending on an external device]
> + * [ KVM_IRQPIN_SMI - system-management-interrupt pin]
> + * [ KVM_IRQPIN_NMI - non-maskable-interrupt pin
> + * @trigger: sensitivity [0 = edge, 1 = level]
> + * @val: [0 = deassert (ignored for edge-trigger), 1 = assert]
> + *
> + * Description: Invokes a registered INTR callback (if present). This
> + * function is meant to be used privately by a irqdevice
> + * implementation.
> + *
> + * Returns: (void)
> + */
> +static inline void kvm_irqdevice_set_intr(struct kvm_irqdevice *dev,
> + kvm_irqpin_t pin, int trigger,
> + int val)
> +{
> + struct kvm_irqsink *sink = &dev->sink;
> + if (sink->set_intr)
> + sink->set_intr(sink, dev, pin, trigger, val);
> +}
>
Do you see more than one implementation for ->set_intr (e.g. for
cascading)? If not, it can be de-pointered.
Shouldn't 'trigger' be part of the pin configuration rather than passed
on every invocation?
>
> +/*
> + * Assumes lock already held
> + */
> +static inline int __kvm_vcpu_irq_all_pending(struct kvm_vcpu *vcpu)
> +{
> + int pending = vcpu->irq.pending;
> +
> + if (vcpu->irq.deferred != -1)
> + __set_bit(kvm_irqpin_localint, &pending);
> +
> + return pending;
> +}
> +
> +/*
> + * These two functions are helpers for determining if a standard interrupt
> + * is pending to replace the old "if (vcpu->irq_summary)" logic. If the
> + * caller wants to know about some of the new advanced interrupt types
> + * (SMI, NMI, etc) or to differentiate between localint and extint they will
> + * have to use the new API
> + */
> +static inline int __kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
> +{
> + int pending = __kvm_vcpu_irq_all_pending(vcpu);
> +
> + if (test_bit(kvm_irqpin_localint, &pending) ||
> + test_bit(kvm_irqpin_extint, &pending))
> + return 1;
> +
> + return 0;
> +}
> +
> +static inline int kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
> +{
> + int ret = 0;
> + int flags;
> +
> + spin_lock_irqsave(&vcpu->irq.lock, flags);
> + ret = __kvm_vcpu_irq_pending(vcpu);
> + spin_unlock_irqrestore(&vcpu->irq.lock, flags);
>
The locking seems superfluous.
> +
> + return ret;
> +}
> +
> +/*
> + * Assumes lock already held
> + */
> +static inline int kvm_vcpu_irq_pop(struct kvm_vcpu *vcpu, int *vector)
> +{
> + int ret = 0;
> +
> + if (vcpu->irq.deferred != -1) {
> + if (vector) {
> + ret |= KVM_IRQACK_VALID;
> + *vector = vcpu->irq.deferred;
> + vcpu->irq.deferred = -1;
> + }
> + ret |= kvm_irqdevice_ack(&vcpu->irq.dev, NULL);
> + } else
> + ret = kvm_irqdevice_ack(&vcpu->irq.dev, vector);
> +
> + /*
> + * If there are no more interrupts and we are edge triggered,
> + * we must clear the status flag
> + */
> + if (!(ret & KVM_IRQACK_AGAIN))
> + __clear_bit(kvm_irqpin_localint, &vcpu->irq.pending);
>
Can localint actually be edge-triggered?
> +
> + return ret;
> +}
> +
> +static inline void __kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
> +{
> + BUG_ON(vcpu->irq.deferred != -1); /* We can only hold one deferred */
> +
> + vcpu->irq.deferred = irq;
> +}
> +
> +static inline void kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
> +{
> + int flags;
> +
> + spin_lock_irqsave(&vcpu->irq.lock, flags);
> + __kvm_vcpu_irq_push(vcpu, irq);
> + spin_unlock_irqrestore(&vcpu->irq.lock, flags);
> +}
> +
>
Can you explain the logic behind push()/pop()? I realize you inherited
it, but I don't think it fits well into the new model.
> @@ -2044,13 +2048,17 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu
> *vcpu,
> if (mmu_reset_needed)
> kvm_mmu_reset_context(vcpu);
>
> - memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
> - sizeof vcpu->irq_pending);
> - vcpu->irq_summary = 0;
> - for (i = 0; i < NR_IRQ_WORDS; ++i)
> - if (vcpu->irq_pending[i])
> - __set_bit(i, &vcpu->irq_summary);
> -
> + /*
> + * walk the interrupt-bitmap and inject an IRQ for each bit found
> + *
> + * note that we skip the first 16 vectors since they are reserved
> + * and should never be set by an interrupt source
> + */
> + for (i = 16; i < 256; ++i) {
> + int val = test_bit(i, &sregs->interrupt_bitmap[0]);
> + kvm_irqdevice_set_pin(&vcpu->irq.dev, i, val);
> + }
> +
>
Theory vs. practice. The bios will set the first pic to vectors 8-15.
> @@ -2319,6 +2321,51 @@ out1:
> }
>
> /*
> + * This function will be invoked whenever the vcpu->irq.dev raises its INTR
> + * line
> + */
> +static void kvm_vcpu_intr(struct kvm_irqsink *this,
> + struct kvm_irqdevice *dev,
> + kvm_irqpin_t pin, int trigger, int val)
> +{
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vcpu->irq.lock, flags);
> +
> + if (val && !test_bit(pin, &vcpu->irq.pending)) {
> + /*
> + * if the line is being asserted and we currently have
> + * it deasserted, we must record
> + */
> + __set_bit(pin, &vcpu->irq.pending);
> +
> + if (trigger)
> + __set_bit(pin, &vcpu->irq.trigger);
> + else
> + __clear_bit(pin, &vcpu->irq.trigger);
> +
> + } else if (!val && trigger)
> + /*
> + * if the level-sensitive line is being deasserted,
> + * record it.
> + */
> + __clear_bit(pin, &vcpu->irq.pending);
> +
> + spin_unlock_irqrestore(&vcpu->irq.lock, flags);
> +}
>
That's quite a mouthful :)
> * Creates some virtual cpus. Good luck creating more than one.
> */
> static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
> @@ -2364,6 +2411,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm,
> int n)
> if (r < 0)
> goto out_free_vcpus;
>
> + kvm_irqdevice_init(&vcpu->irq.dev);
> + kvm_vcpu_irqsink_init(vcpu);
> + r = kvm_userint_init(vcpu);
> + if (r < 0)
> + goto out_free_vcpus;
>
Bad indent.
> static inline void clgi(void)
> {
> asm volatile (SVM_CLGI);
> @@ -892,7 +874,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct
> kvm_run *kvm_run)
> int r;
>
> if (is_external_interrupt(exit_int_info))
> - push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
> + /*
> + * An exception was taken while we were trying to inject an
> + * IRQ. We must defer the injection of the vector until
> + * the next window.
> + */
> + kvm_vcpu_irq_push(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
>
Ah, I remember what push/pop is for now. We actually have ->ack() to
deal with this now. Unfortunately with auto-eoi we don't have a good
place to call it. So push() is a kind of unack() for eoi interrupts.
> @ -1434,7 +1482,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
> static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
> struct kvm_run *kvm_run)
> {
> - return (!vcpu->irq_summary &&
> + return (!kvm_vcpu_irq_pending(vcpu) &&
>
whoops.
Overall this seems to be improving, but I'm concerned about the much
increased complexity of it all. Probably much of it is unavoidable, but
I'd like not to see any unnecessary stuff as debugging in this area is
pretty much impossible.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel