Thank you for your comments.
I agree with all.
I'll move my code to mulator_write_emulated_onepage().
I'll use kvm_io_device mechanism: is there an existing ioctl() to
register MMIO zone ?
I'll add a KVM_CAP_
Regards,
Laurent
Le dimanche 18 mai 2008 à 10:39 +0300, Avi Kivity a écrit :
> [resend to new list]
>
> Laurent Vivier wrote:
> > This patch is the kernel part of the "batch writes to MMIO" patch.
> >
> > It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
> > Inside a zone, we can define sub-part we must not delay.
> >
> > If an MMIO can be delayed, it is stored in a ring buffer which common for
> > all VCPUs.
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index dab3d4f..930986b 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1518,6 +1518,103 @@ out:
> > return r;
> > }
> >
> > +static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
> > + u64 addr, u32 size)
> >
>
> gpa_t addr
>
> > +{
> > + int i;
> > + struct kvm_delayed_mmio_zone *zone;
> > +
> > + for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
> > + zone = &kvm->arch.mmio_zone[i];
> > +
> > + /* (addr,size) is fully included in
> > + * (zone->addr, zone->size)
> > + */
> > +
> > + if (zone->addr <= addr &&
> > + addr + size <= zone->addr + zone->size)
> > + return zone;
> > + }
> > + return NULL;
> > +}
> > +
> > +static struct kvm_excluded_mmio_zone *
> > +kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32
> > size)
> > +{
> > + static struct kvm_excluded_mmio_zone *excluded;
> > + int i;
> > +
> > + addr -= zone->addr;
> > + for (i = 0; i < zone->nb_excluded_zones; i++) {
> > + excluded = &zone->excluded[i];
> > +
> > + if ((excluded->offset <= addr &&
> > + addr < excluded->offset + excluded->size) ||
> > + (excluded->offset < addr + size &&
> > + addr + size <= excluded->offset +
> > + excluded->size))
> > + return excluded;
> > + }
> > + return NULL;
> > +}
> > +
> >
>
> Instead of included/excluded zones (with the default excluded), I
> suggest having just included zones (with the default excluded). If the
> user specifies an excluded zone within an included zone, simply split
> the included zone into two:
>
> [100, 200) + ^[150, 160) = [100, 150) + [160, 200)
>
> Or maybe, push all this logic to userspace and only allow adding include
> zones (and clearing previously set include zones).
>
> > +static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
> > + struct kvm_mmio_zone *zone)
> >
>
> "mmio" is too generic a name. Please use delayed mmio or batched mmio
> consistently.
>
> > +{
> > + struct kvm_delayed_mmio_zone *z;
> > +
> > + if (zone->is_delayed &&
> > + kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
> > + return -ENOMEM;
> >
>
> -ENOBUFS. -ENOMEM is generally not recoverable, ENOBUFS means just this
> specific operation failed.
>
> > +
> > + if (zone->is_delayed) {
> > +
> > + /* already defined ? */
> > +
> > + if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
> > + kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
> > + return 0;
> > +
> > + z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
> > + z->addr = zone->addr;
> > + z->size = zone->size;
> > + kvm->arch.nb_mmio_zones++;
> > + return 0;
> > + }
> > +
> > + /* exclude some parts of the delayed MMIO zone */
> > +
> > + z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
> > + if (z == NULL)
> > + return -EINVAL;
> > +
> > + if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
> > + return -ENOMEM;
> > +
> > + if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
> > + kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
> > + return 0;
> > +
> > + z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
> > + z->excluded[z->nb_excluded_zones].size = zone->size;
> > + z->nb_excluded_zones++;
> > +
> > + return 0;
> > +}
> > +
> >
>
> The whole thing needs locking against concurrent calls and against users
> of the structure.
>
> >
> > +static int batch_mmio(struct kvm_vcpu *vcpu)
> > +{
> > + struct kvm_batch *batch = vcpu->kvm->arch.batch;
> > + spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
> > + int next;
> >
>
> Not sure a new lock is warranted. I think we can reuse kvm->lock.
>
> > +
> > + /* check if this MMIO can be delayed */
> > +
> > + if (!kvm_is_delayed_mmio(vcpu->kvm,
> > + vcpu->mmio_phys_addr, vcpu->mmio_size))
> > + return 0;
> >
>
> This check is performed without any locks.
>
> > +
> > static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> > {
> > int r;
> > @@ -2857,6 +3012,11 @@ again:
> > goto again;
> > }
> >
> > + if (!r &&
> > + vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
> > + && !need_resched() && batch_mmio(vcpu))
> > + goto again;
> > +
> >
>
> Better to do this in emulator_write_emulated_onepage(), similar to how
> local mmio is handled.
>
> In fact, we can register batched mmio regions using the existing
> kvm_io_device mechanism.
>
> >
> > +#define KVM_MAX_DELAYED_MMIO_ZONE 10
> > +#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
> >
>
> We'll want more, esp. as they're cheap.
>
> >
> > /*
> > * ioctls for vcpu fds
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 64ed402..c8f1bdf 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma,
> > struct vm_fault *vmf)
> > #ifdef CONFIG_X86
> > else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
> > page = virt_to_page(vcpu->arch.pio_data);
> > + else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
> > + page = virt_to_page(vcpu->kvm->arch.batch);
> > #endif
> >
>
> I expect this will be interesting for ppc and ia64.
>
> I didn't see a KVM_CAP_ for this.
>
--
------------- [EMAIL PROTECTED] ---------------
"The best way to predict the future is to invent it."
- Alan Kay
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html