On Tuesday 06 May 2008 03:06:23 Kay, Allen M wrote: > Kvm kernel changes. > > Signed-off-by: Allen M Kay <[EMAIL PROTECTED]>
> --- /dev/null > +++ b/arch/x86/kvm/vtd.c > @@ -0,0 +1,183 @@ > + > +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 > + > +struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev > *dev); > +struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu); > +void iommu_free_domain(struct dmar_domain *domain); > +int domain_init(struct dmar_domain *domain, int guest_width); > +int domain_context_mapping(struct dmar_domain *d, > + struct pci_dev *pdev); > +int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, > + u64 hpa, size_t size, int prot); > +void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 > devfn); > +struct dmar_domain * find_domain(struct pci_dev *pdev); Please move these to a .h file and also prefix appropriate keywords: domain_context_mapping is confusing and since it's an intel iommu-only thing, use something like intel_iommu_domain_context_mapping > +int kvm_iommu_map_pages(struct kvm *kvm, > + gfn_t base_gfn, unsigned long npages) > +{ > + unsigned long gpa; > + struct page *page; > + hpa_t hpa; > + int j, write; > + struct vm_area_struct *vma; > + > + if (!kvm->arch.domain) > + return 1; > + > + gpa = base_gfn << PAGE_SHIFT; > + page = gfn_to_page(kvm, base_gfn); > + hpa = page_to_phys(page); > + > + printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n", gpa); > + printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %llx\n", hpa); > + printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n", > + npages*PAGE_SIZE); > + > + for (j = 0; j < npages; j++) { > + gpa += PAGE_SIZE; > + page = gfn_to_page(kvm, gpa >> PAGE_SHIFT); > + hpa = page_to_phys(page); > + domain_page_mapping(kvm->arch.domain, gpa, hpa, > PAGE_SIZE, > + DMA_PTE_READ | DMA_PTE_WRITE); > + vma = find_vma(current->mm, gpa); > + if (!vma) > + return 1; * > + write = (vma->vm_flags & VM_WRITE) != 0; > + get_user_pages(current, current->mm, gpa, > + PAGE_SIZE, write, 0, NULL, NULL); You should put_page each of the user pages when freeing or exiting (in unmap_guest), else a ref is held on each page and that's a lot of memory leaked. Also, this rules out any form of guest swapping. You should put_page in case a balloon driver in the guest tries to free some pages for the host. > + } > + return 0; > +} > +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages); > + > +static int kvm_iommu_map_memslots(struct kvm *kvm) > +{ > + int i, status; > + for (i = 0; i < kvm->nmemslots; i++) { > + status = kvm_iommu_map_pages(kvm, > kvm->memslots[i].base_gfn, > + kvm->memslots[i].npages); > + if (status) > + return status; * > + } > + return 0; > +} > + > +int kvm_iommu_map_guest(struct kvm *kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev) > +{ > + struct dmar_drhd_unit *drhd; > + struct dmar_domain *domain; > + struct intel_iommu *iommu; > + struct pci_dev *pdev = NULL; > + > + printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n", > + pci_pt_dev->host.busnr, > + PCI_SLOT(pci_pt_dev->host.devfn), > + PCI_FUNC(pci_pt_dev->host.devfn)); > + > + for_each_pci_dev(pdev) { > + if ((pdev->bus->number == pci_pt_dev->host.busnr) && > + (pdev->devfn == pci_pt_dev->host.devfn)) > + goto found; > + } You can use pci_get_device instead of going through the list yourself. > + goto not_found; > +found: > + pci_pt_dev->pdev = pdev; > + > + drhd = dmar_find_matched_drhd_unit(pdev); > + if (!drhd) { > + printk(KERN_ERR "kvm_iommu_map_guest: drhd == NULL\n"); > + goto not_found; > + } > + > + printk(KERN_DEBUG "kvm_iommu_map_guest: reg_base_addr = %llx\n", > + drhd->reg_base_addr); > + > + iommu = drhd->iommu; > + if (!iommu) { > + printk(KERN_ERR "kvm_iommu_map_guest: iommu == NULL\n"); > + goto not_found; > + } > + domain = iommu_alloc_domain(iommu); > + if (!domain) { > + printk(KERN_ERR "kvm_iommu_map_guest: domain == > NULL\n"); > + goto not_found; > + } > + if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { > + printk(KERN_ERR "kvm_iommu_map_guest: domain_init() > failed\n"); > + goto not_found; Memory allocated in iommu_alloc_domain is leaked in this case > + } > + kvm->arch.domain = domain; > + kvm_iommu_map_memslots(kvm); *: You don't check for failure in mapping > + domain_context_mapping(kvm->arch.domain, pdev); > + return 0; > +not_found: > + return 1; > +} > +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest); > + > +int kvm_iommu_unmap_guest(struct kvm *kvm) > +{ > + struct dmar_domain *domain; > + struct kvm_pci_pt_dev_list *entry; > + struct pci_dev *pdev; > + > + list_for_each_entry(entry, &kvm->arch.domain->devices, list) { > + printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n", > + entry->pt_dev.host.busnr, > + PCI_SLOT(entry->pt_dev.host.devfn), > + PCI_FUNC(entry->pt_dev.host.devfn)); > + > + pdev = entry->pt_dev.pdev; > + > + if (pdev == NULL) { > + printk("kvm_iommu_unmap_guest: pdev == NULL\n"); > + return 1; > + } > + > + /* detach kvm dmar domain */ > + detach_domain_for_dev(kvm->arch.domain, > + pdev->bus->number, pdev->devfn); > + > + /* now restore back linux iommu domain */ > + domain = find_domain(pdev); > + if (domain) > + domain_context_mapping(domain, pdev); > + else > + printk(KERN_DEBUG > + "kvm_iommu_unmap_guest: domain == > NULL\n"); > + } > + /* unmap guest memory in vt-d page table */ > + iommu_free_domain(kvm->arch.domain); > + return 0; > +} > +EXPORT_SYMBOL_GPL(kvm_iommu_unmap_guest); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index a97d2e2..a877db2 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -257,6 +257,7 @@ static void kvm_free_pci_passthrough(struct kvm > *kvm) > > list_del(&pci_pt_dev->list); > } > + kvm->arch.domain = NULL; > } > > unsigned long segment_base(u16 selector) > @@ -1846,6 +1847,10 @@ long kvm_arch_vm_ioctl(struct file *filp, > if (copy_from_user(&pci_pt_dev, argp, sizeof > pci_pt_dev)) > goto out; > > + r = kvm_iommu_map_guest(kvm, &pci_pt_dev); > + if (r) > + goto out; > + > r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev); > if (r) > goto out; If the ioctl fails, you don't "unmap" the guest (and also leak memory). I suggest you call the map guest routine after the ioctl since the ioctl also checks if a similar device has already been added and populates the structure. In that case, you should call kvm_free_pci_passthrough() on unsuccessful iommu mapping Looks like you're leaking memory and failing to unmap or free the domain in all the error paths. > @@ -4088,6 +4093,8 @@ static void kvm_free_vcpus(struct kvm *kvm) > > void kvm_arch_destroy_vm(struct kvm *kvm) > { > + if (kvm->arch.domain) > + kvm_iommu_unmap_guest(kvm); This condition can be checked for inside the unmap guest routine. iommu_unmap_guest can be called unconditionally. > kvm_free_pci_passthrough(kvm); > kvm_free_pit(kvm); > kfree(kvm->arch.vpic); > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h > index 4662d49..70248cb 100644 > --- a/include/asm-x86/kvm_host.h > +++ b/include/asm-x86/kvm_host.h > @@ -19,6 +19,8 @@ > #include <linux/kvm_types.h> > > #include <asm/desc.h> > +#include <linux/dmar.h> > +#include <linux/intel-iommu.h> > > #define KVM_MAX_VCPUS 16 > #define KVM_MEMORY_SLOTS 32 > @@ -318,6 +320,7 @@ struct kvm_arch{ > */ > struct list_head active_mmu_pages; > struct list_head pci_pt_dev_head; > + struct dmar_domain *domain; Use a descriptive name, like intel_iommu_domain. > struct kvm_pic *vpic; > struct kvm_ioapic *vioapic; > struct kvm_pit *vpit; > diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h > index 5f93b78..6202ed1 100644 > --- a/include/asm-x86/kvm_para.h > +++ b/include/asm-x86/kvm_para.h > @@ -170,5 +170,6 @@ struct kvm_pci_pt_info { > struct kvm_pci_passthrough_dev { > struct kvm_pci_pt_info guest; > struct kvm_pci_pt_info host; > + struct pci_dev *pdev; /* kernel device pointer for host dev > */ > }; > #endif > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 4e16682..bcfcf78 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -276,6 +276,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); > int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); > void kvm_vcpu_kick(struct kvm_vcpu *vcpu); > > +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, > + unsigned long npages); > +int kvm_iommu_map_guest(struct kvm *kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev); > +int kvm_iommu_unmap_guest(struct kvm *kvm); > + Why can't these be put in asm-x86/kvm_host.h? > static inline void kvm_guest_enter(void) > { > account_system_vtime(current); > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index d3cb4cc..e46614a 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -309,6 +309,9 @@ int __kvm_set_memory_region(struct kvm *kvm, > new.npages = npages; > new.flags = mem->flags; > > + /* map the pages in iommu page table */ (if one exists) > + kvm_iommu_map_pages(kvm, base_gfn, npages); > + You should do this once all the memory is set up properly (which is just before returning 0) > /* Disallow changing a memory slot's size. */ > r = -EINVAL; > if (npages && old.npa Amit ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel