Thank you for your comments.

I agree with all.

I'll move my code to mulator_write_emulated_onepage().
I'll use kvm_io_device mechanism: is there an existing ioctl() to
register MMIO zone ?
I'll add a KVM_CAP_

Regards,
Laurent

Le dimanche 18 mai 2008 à 10:39 +0300, Avi Kivity a écrit :
> [resend to new list]
> 
> Laurent Vivier wrote:
> > This patch is the kernel part of the "batch writes to MMIO" patch.
> >
> > It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
> > Inside a zone, we can define sub-part we must not delay.
> >
> > If an MMIO can be delayed, it is stored in a ring buffer which common for 
> > all VCPUs.
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index dab3d4f..930986b 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1518,6 +1518,103 @@ out:
> >     return r;
> >  }
> >  
> > +static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
> > +                                                   u64 addr, u32 size)
> >   
> 
> gpa_t addr
> 
> > +{
> > +   int i;
> > +   struct kvm_delayed_mmio_zone *zone;
> > +
> > +   for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
> > +           zone = &kvm->arch.mmio_zone[i];
> > +
> > +           /* (addr,size) is fully included in
> > +            * (zone->addr, zone->size)
> > +            */
> > +
> > +           if (zone->addr <= addr &&
> > +               addr + size <= zone->addr + zone->size)
> > +                   return zone;
> > +   }
> > +   return NULL;
> > +}
> > +
> > +static struct kvm_excluded_mmio_zone *
> > +kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 
> > size)
> > +{
> > +   static struct kvm_excluded_mmio_zone *excluded;
> > +   int i;
> > +
> > +   addr -= zone->addr;
> > +   for (i = 0; i < zone->nb_excluded_zones; i++) {
> > +           excluded = &zone->excluded[i];
> > +
> > +           if ((excluded->offset <= addr &&
> > +                addr < excluded->offset + excluded->size) ||
> > +                (excluded->offset < addr + size &&
> > +                 addr + size <= excluded->offset +
> > +                               excluded->size))
> > +                   return excluded;
> > +   }
> > +   return NULL;
> > +}
> > +
> >   
> 
> Instead of included/excluded zones (with the default excluded), I
> suggest having just included zones (with the default excluded).  If the
> user specifies an excluded zone within an included zone, simply split
> the included zone into two:
> 
>   [100, 200) + ^[150, 160) = [100, 150) + [160, 200)
> 
> Or maybe, push all this logic to userspace and only allow adding include
> zones (and clearing previously set include zones).
> 
> > +static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
> > +                            struct kvm_mmio_zone *zone)
> >   
> 
> "mmio" is too generic a name.  Please use delayed mmio or batched mmio
> consistently.
> 
> > +{
> > +   struct kvm_delayed_mmio_zone *z;
> > +
> > +   if (zone->is_delayed &&
> > +       kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
> > +           return -ENOMEM;
> >   
> 
> -ENOBUFS.  -ENOMEM is generally not recoverable, ENOBUFS means just this
> specific operation failed.
> 
> > +
> > +   if (zone->is_delayed) {
> > +
> > +           /* already defined ? */
> > +
> > +           if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
> > +               kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
> > +                   return 0;
> > +
> > +           z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
> > +           z->addr = zone->addr;
> > +           z->size = zone->size;
> > +           kvm->arch.nb_mmio_zones++;
> > +           return 0;
> > +   }
> > +
> > +   /* exclude some parts of the delayed MMIO zone */
> > +
> > +   z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
> > +   if (z == NULL)
> > +           return -EINVAL;
> > +
> > +   if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
> > +           return -ENOMEM;
> > +
> > +   if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
> > +       kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
> > +           return 0;
> > +
> > +   z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
> > +   z->excluded[z->nb_excluded_zones].size = zone->size;
> > +   z->nb_excluded_zones++;
> > +
> > +   return 0;
> > +}
> > +
> >   
> 
> The whole thing needs locking against concurrent calls and against users
> of the structure.
> 
> >  
> > +static int batch_mmio(struct kvm_vcpu *vcpu)
> > +{
> > +   struct kvm_batch *batch = vcpu->kvm->arch.batch;
> > +   spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
> > +   int next;
> >   
> 
> Not sure a new lock is warranted. I think we can reuse kvm->lock.
> 
> > +
> > +   /* check if this MMIO can be delayed */
> > +
> > +   if (!kvm_is_delayed_mmio(vcpu->kvm,
> > +                            vcpu->mmio_phys_addr, vcpu->mmio_size))
> > +           return 0;
> >   
> 
> This check is performed without any locks.
> 
> > +
> >  static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  {
> >     int r;
> > @@ -2857,6 +3012,11 @@ again:
> >                     goto again;
> >     }
> >  
> > +   if (!r &&
> > +       vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
> > +       && !need_resched() && batch_mmio(vcpu))
> > +           goto again;
> > +
> >   
> 
> Better to do this in emulator_write_emulated_onepage(), similar to how
> local mmio is handled.
> 
> In fact, we can register batched mmio regions using the existing
> kvm_io_device mechanism.
> 
> >  
> > +#define KVM_MAX_DELAYED_MMIO_ZONE 10
> > +#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
> >   
> 
> We'll want more, esp. as they're cheap.
> 
> >  
> >  /*
> >   * ioctls for vcpu fds
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 64ed402..c8f1bdf 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, 
> > struct vm_fault *vmf)
> >  #ifdef CONFIG_X86
> >     else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
> >             page = virt_to_page(vcpu->arch.pio_data);
> > +   else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
> > +           page = virt_to_page(vcpu->kvm->arch.batch);
> >  #endif
> >   
> 
> I expect this will be interesting for ppc and ia64.
> 
> I didn't see a KVM_CAP_ for this.
> 
-- 
------------- [EMAIL PROTECTED] ---------------
"The best way to predict the future is to invent it."
- Alan Kay

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to