On Tuesday 06 May 2008 03:06:23 Kay, Allen M wrote:
> Kvm kernel changes.
>
> Signed-off-by: Allen M Kay <[EMAIL PROTECTED]>

> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,183 @@


> +
> +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
> +
> +struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev
> *dev);
> +struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu);
> +void iommu_free_domain(struct dmar_domain *domain);
> +int domain_init(struct dmar_domain *domain, int guest_width);
> +int domain_context_mapping(struct dmar_domain *d,
> +                     struct pci_dev *pdev);
> +int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
> +                     u64 hpa, size_t size, int prot);
> +void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8
> devfn);
> +struct dmar_domain * find_domain(struct pci_dev *pdev);

Please move these to a .h file and also prefix appropriate keywords:

domain_context_mapping is confusing and since it's an intel iommu-only thing, 
use something like

intel_iommu_domain_context_mapping 

> +int kvm_iommu_map_pages(struct kvm *kvm,
> +     gfn_t base_gfn, unsigned long npages)
> +{
> +     unsigned long gpa;
> +     struct page *page;
> +     hpa_t hpa;
> +     int j, write;
> +     struct vm_area_struct *vma;
> +
> +     if (!kvm->arch.domain)
> +             return 1;
> +
> +     gpa = base_gfn << PAGE_SHIFT;
> +     page = gfn_to_page(kvm, base_gfn);
> +     hpa = page_to_phys(page);
> +
> +     printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n", gpa);
> +     printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %llx\n", hpa);
> +     printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
> +             npages*PAGE_SIZE);
> +
> +     for (j = 0; j < npages; j++) {
> +             gpa +=  PAGE_SIZE;
> +             page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
> +             hpa = page_to_phys(page);
> +             domain_page_mapping(kvm->arch.domain, gpa, hpa,
> PAGE_SIZE,
> +                                     DMA_PTE_READ | DMA_PTE_WRITE);
> +             vma = find_vma(current->mm, gpa);
> +             if (!vma)
> +                     return 1;

*

> +             write = (vma->vm_flags & VM_WRITE) != 0;
> +             get_user_pages(current, current->mm, gpa,
> +                             PAGE_SIZE, write, 0, NULL, NULL);

You should put_page each of the user pages when freeing or exiting (in 
unmap_guest), else a ref is held on each page and that's a lot of memory 
leaked.

Also, this rules out any form of guest swapping. You should put_page in case a 
balloon driver in the guest tries to free some pages for the host.

> +     }
> +     return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +     int i, status;
> +     for (i = 0; i < kvm->nmemslots; i++) {
> +             status = kvm_iommu_map_pages(kvm,
> kvm->memslots[i].base_gfn,
> +                             kvm->memslots[i].npages);
> +             if (status)
> +                     return status;

*

> +     }
> +     return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +     struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> +     struct dmar_drhd_unit *drhd;
> +     struct dmar_domain *domain;
> +     struct intel_iommu *iommu;
> +     struct pci_dev *pdev = NULL;
> +
> +     printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
> +             pci_pt_dev->host.busnr,
> +             PCI_SLOT(pci_pt_dev->host.devfn),
> +             PCI_FUNC(pci_pt_dev->host.devfn));
> +
> +     for_each_pci_dev(pdev) {
> +             if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> +                     (pdev->devfn == pci_pt_dev->host.devfn))
> +                     goto found;
> +     }

You can use pci_get_device instead of going through the list yourself.

> +     goto not_found;
> +found:
> +     pci_pt_dev->pdev = pdev;
> +
> +     drhd = dmar_find_matched_drhd_unit(pdev);
> +     if (!drhd) {
> +             printk(KERN_ERR "kvm_iommu_map_guest: drhd == NULL\n");
> +             goto not_found;
> +     }
> +
> +     printk(KERN_DEBUG "kvm_iommu_map_guest: reg_base_addr = %llx\n",
> +             drhd->reg_base_addr);
> +
> +     iommu = drhd->iommu;
> +     if (!iommu) {
> +             printk(KERN_ERR "kvm_iommu_map_guest: iommu == NULL\n");
> +             goto not_found;
> +     }
> +     domain = iommu_alloc_domain(iommu);
> +     if (!domain) {
> +             printk(KERN_ERR "kvm_iommu_map_guest: domain ==
> NULL\n");
> +             goto not_found;
> +     }
> +     if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
> +             printk(KERN_ERR "kvm_iommu_map_guest: domain_init()
> failed\n");
> +             goto not_found;

Memory allocated in iommu_alloc_domain is leaked in this case

> +     }
> +     kvm->arch.domain = domain;
> +     kvm_iommu_map_memslots(kvm);

*: You don't check for failure in mapping

> +     domain_context_mapping(kvm->arch.domain, pdev);
> +     return 0;
> +not_found:
> +     return 1;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +     struct dmar_domain *domain;
> +     struct kvm_pci_pt_dev_list *entry;
> +     struct pci_dev *pdev;
> +
> +     list_for_each_entry(entry, &kvm->arch.domain->devices, list) {
> +             printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
> +                     entry->pt_dev.host.busnr,
> +                     PCI_SLOT(entry->pt_dev.host.devfn),
> +                     PCI_FUNC(entry->pt_dev.host.devfn));
> +
> +             pdev = entry->pt_dev.pdev;
> +
> +             if (pdev == NULL) {
> +                     printk("kvm_iommu_unmap_guest: pdev == NULL\n");
> +                     return 1;
> +             }
> +
> +             /* detach kvm dmar domain */
> +             detach_domain_for_dev(kvm->arch.domain,
> +                             pdev->bus->number, pdev->devfn);
> +
> +             /* now restore back linux iommu domain */
> +             domain = find_domain(pdev);
> +             if (domain)
> +                     domain_context_mapping(domain, pdev);
> +             else
> +                     printk(KERN_DEBUG
> +                             "kvm_iommu_unmap_guest: domain ==
> NULL\n");
> +     }
> +     /* unmap guest memory in vt-d page table */
> +     iommu_free_domain(kvm->arch.domain);
> +     return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_unmap_guest);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a97d2e2..a877db2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -257,6 +257,7 @@ static void kvm_free_pci_passthrough(struct kvm
> *kvm)
>
>               list_del(&pci_pt_dev->list);
>       }
> +     kvm->arch.domain = NULL;
>  }
>
>  unsigned long segment_base(u16 selector)
> @@ -1846,6 +1847,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
>               if (copy_from_user(&pci_pt_dev, argp, sizeof
> pci_pt_dev))
>                       goto out;
>
> +             r = kvm_iommu_map_guest(kvm, &pci_pt_dev);
> +             if (r)
> +                     goto out;
> +
>               r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev);
>               if (r)
>                       goto out;

If the ioctl fails, you don't "unmap" the guest (and also leak memory). I 
suggest you call the map guest routine after the ioctl since the ioctl also 
checks if a similar device has already been added and populates the 
structure. In that case, you should call kvm_free_pci_passthrough() on 
unsuccessful iommu mapping

Looks like you're leaking memory and failing to unmap or free the domain in 
all the error paths.

> @@ -4088,6 +4093,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +     if (kvm->arch.domain)
> +             kvm_iommu_unmap_guest(kvm);

This condition can be checked for inside the unmap guest routine. 
iommu_unmap_guest can be called unconditionally.

>       kvm_free_pci_passthrough(kvm);
>       kvm_free_pit(kvm);
>       kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 4662d49..70248cb 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -19,6 +19,8 @@
>  #include <linux/kvm_types.h>
>
>  #include <asm/desc.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
>
>  #define KVM_MAX_VCPUS 16
>  #define KVM_MEMORY_SLOTS 32
> @@ -318,6 +320,7 @@ struct kvm_arch{
>        */
>       struct list_head active_mmu_pages;
>       struct list_head pci_pt_dev_head;
> +     struct dmar_domain *domain;

Use a descriptive name, like intel_iommu_domain.

>       struct kvm_pic *vpic;
>       struct kvm_ioapic *vioapic;
>       struct kvm_pit *vpit;
> diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
> index 5f93b78..6202ed1 100644
> --- a/include/asm-x86/kvm_para.h
> +++ b/include/asm-x86/kvm_para.h
> @@ -170,5 +170,6 @@ struct kvm_pci_pt_info {
>  struct kvm_pci_passthrough_dev {
>       struct kvm_pci_pt_info guest;
>       struct kvm_pci_pt_info host;
> +     struct pci_dev *pdev;    /* kernel device pointer for host dev
> */
>  };
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 4e16682..bcfcf78 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -276,6 +276,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +                        unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +                        struct kvm_pci_passthrough_dev *pci_pt_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +

Why can't these be put in asm-x86/kvm_host.h?

>  static inline void kvm_guest_enter(void)
>  {
>       account_system_vtime(current);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d3cb4cc..e46614a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -309,6 +309,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>       new.npages = npages;
>       new.flags = mem->flags;
>
> +        /* map the pages in iommu page table */

(if one exists)

> +        kvm_iommu_map_pages(kvm, base_gfn, npages);
> +

You should do this once all the memory is set up properly (which is just 
before returning 0)

>       /* Disallow changing a memory slot's size. */
>       r = -EINVAL;
>       if (npages && old.npa

Amit

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to