[PATCH V4 2/2] vfio: optimized for unpinning pages

2020-09-08 Thread Ming Mao
The pages are unpinned one by one in unpin_user_pages_dirty_lock().
We add a new API unpin_user_hugetlb_pages_dirty_lock() which deletes
the for loop to optimize this.
If we want to unpin the hugetlb pages, all work can be done by a single
operation to the head page in this API.

Signed-off-by: Ming Mao 
---
 drivers/vfio/vfio_iommu_type1.c | 90 +++-
 include/linux/mm.h  |  3 ++
 mm/gup.c| 91 +
 3 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8c1dc5136..44fc5f16c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -609,6 +609,26 @@ static long hugetlb_page_vaddr_get_pfn(struct mm_struct 
*mm, unsigned long vaddr
return ret;
 }
 
+/*
+ * put pfns for a hugetlb page
+ * @start: the PAGE_SIZE-page we start to put,can be any page in this hugetlb 
page
+ * @npages: the number of PAGE_SIZE-pages to put
+ * @prot: IOMMU_READ/WRITE
+ */
+static int hugetlb_put_pfn(unsigned long start, unsigned long npages, int prot)
+{
+   struct page *page;
+
+   if (!pfn_valid(start))
+   return -EFAULT;
+
+   page = pfn_to_page(start);
+   if (!page || !PageHuge(page))
+   return -EINVAL;
+
+   return unpin_user_hugetlb_pages_dirty_lock(page, npages, prot & 
IOMMU_WRITE);
+}
+
 static long vfio_pin_hugetlb_pages_remote(struct vfio_dma *dma, unsigned long 
vaddr,
  long npage, unsigned long *pfn_base,
  unsigned long limit)
@@ -616,7 +636,7 @@ static long vfio_pin_hugetlb_pages_remote(struct vfio_dma 
*dma, unsigned long va
unsigned long pfn = 0;
long ret, pinned = 0, lock_acct = 0;
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
-   long pinned_loop, i;
+   long pinned_loop;
 
/* This code path is only user initiated */
if (!current->mm)
@@ -674,8 +694,7 @@ static long vfio_pin_hugetlb_pages_remote(struct vfio_dma 
*dma, unsigned long va
 
if (!dma->lock_cap &&
current->mm->locked_vm + lock_acct > limit) {
-   for (i = 0; i < pinned_loop; i++)
-   put_pfn(pfn++, dma->prot);
+   hugetlb_put_pfn(pfn, pinned_loop, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
__func__, limit << PAGE_SHIFT);
ret = -ENOMEM;
@@ -695,6 +714,40 @@ static long vfio_pin_hugetlb_pages_remote(struct vfio_dma 
*dma, unsigned long va
return pinned;
 }
 
+static long vfio_unpin_hugetlb_pages_remote(struct vfio_dma *dma, dma_addr_t 
iova,
+   unsigned long pfn, long npage,
+   bool do_accounting)
+{
+   long unlocked = 0, locked = 0;
+   long i, unpinned;
+
+   for (i = 0; i < npage; i += unpinned, iova += unpinned * PAGE_SIZE) {
+   if (!is_hugetlb_page(pfn))
+   goto slow_path;
+
+   unpinned = hugetlb_put_pfn(pfn, npage - i, dma->prot);
+   if (unpinned > 0) {
+   pfn += unpinned;
+   unlocked += unpinned;
+   locked += hugetlb_page_get_externally_pinned_num(dma, 
pfn, unpinned);
+   } else
+   goto slow_path;
+   }
+slow_path:
+   for (; i < npage; i++, iova += PAGE_SIZE) {
+   if (put_pfn(pfn++, dma->prot)) {
+   unlocked++;
+   if (vfio_find_vpfn(dma, iova))
+   locked++;
+   }
+   }
+
+   if (do_accounting)
+   vfio_lock_acct(dma, locked - unlocked, true);
+
+   return unlocked;
+}
+
 /*
  * Attempt to pin pages.  We really don't want to track all the pfns and
  * the iommu can only map chunks of consecutive pfns anyway, so get the
@@ -993,11 +1046,18 @@ static long vfio_sync_unpin(struct vfio_dma *dma, struct 
vfio_domain *domain,
iommu_tlb_sync(domain->domain, iotlb_gather);
 
list_for_each_entry_safe(entry, next, regions, list) {
-   unlocked += vfio_unpin_pages_remote(dma,
-   entry->iova,
-   entry->phys >> PAGE_SHIFT,
-   entry->len >> PAGE_SHIFT,
-   false);
+   if (is_hugetlb_page(entry->phys >> PAGE_SHIFT))
+   unlocked += vfio_unpin_hugetlb_pages_remote(dma,
+   e

[PATCH V4 1/2] vfio dma_map/unmap: optimized for hugetlbfs pages

2020-09-08 Thread Ming Mao
In the original process of dma_map/unmap pages for VFIO-devices,
to make sure the pages are contiguous, we have to check them one by one.
As a result, dma_map/unmap could spend a long time.
Using the hugetlb pages, we can avoid this problem.
All pages in hugetlb pages are contiguous.And the hugetlb
page should not be split.So we can delete the for loops.

According to the suggestions of Peter Xu,
we should use the API unpin_user_pages_dirty_lock() to unpin hugetlb pages.
And the pages are unpinned one by one in this API.
So it is better to optimize the API.
In this patch, we do not optimize the process of unpinning.
We will do this in another patch.

Signed-off-by: Ming Mao 
---
 drivers/vfio/vfio_iommu_type1.c | 289 +++-
 1 file changed, 281 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5e556ac91..8c1dc5136 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -479,6 +479,222 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
long vaddr,
return ret;
 }
 
+static bool is_hugetlb_page(unsigned long pfn)
+{
+   struct page *page;
+
+   if (!pfn_valid(pfn))
+   return false;
+
+   page = pfn_to_page(pfn);
+   /* only check for hugetlb pages */
+   return page && PageHuge(page);
+}
+
+static bool vaddr_is_hugetlb_page(unsigned long vaddr, int prot)
+{
+   unsigned long pfn;
+   int ret;
+   bool result;
+
+   if (!current->mm)
+   return false;
+
+   ret = vaddr_get_pfn(current->mm, vaddr, prot, );
+   if (ret)
+   return false;
+
+   result = is_hugetlb_page(pfn);
+
+   put_pfn(pfn, prot);
+
+   return result;
+}
+
+/*
+ * get the number of residual PAGE_SIZE-pages in a hugetlb page
+ * (including the page which pointed by this address)
+ * @address: we count residual pages from this address to the end of
+ * a hugetlb page
+ * @order: the order of the same hugetlb page
+ */
+static long
+hugetlb_get_residual_pages(unsigned long address, unsigned int order)
+{
+   unsigned long hugetlb_npage;
+   unsigned long hugetlb_mask;
+
+   if (!order)
+   return -EINVAL;
+
+   hugetlb_npage = 1UL << order;
+   hugetlb_mask = hugetlb_npage - 1;
+   address = address >> PAGE_SHIFT;
+
+   /*
+* Since we count the page pointed by this address, the number of
+* residual PAGE_SIZE-pages is greater than or equal to 1.
+*/
+   return hugetlb_npage - (address & hugetlb_mask);
+}
+
+static unsigned int
+hugetlb_page_get_externally_pinned_num(struct vfio_dma *dma,
+   unsigned long start,
+   unsigned long npage)
+{
+   struct vfio_pfn *vpfn;
+   struct rb_node *node;
+   unsigned long end;
+   unsigned int num = 0;
+
+   if (!dma || !npage)
+   return 0;
+
+   end = start + npage - 1;
+   /* If we find a page in dma->pfn_list, this page has been pinned 
externally */
+   for (node = rb_first(>pfn_list); node; node = rb_next(node)) {
+   vpfn = rb_entry(node, struct vfio_pfn, node);
+   if ((vpfn->pfn >= start) && (vpfn->pfn <= end))
+   num++;
+   }
+
+   return num;
+}
+
+static long hugetlb_page_vaddr_get_pfn(struct mm_struct *mm, unsigned long 
vaddr,
+   int prot, long npage, unsigned long pfn)
+{
+   long hugetlb_residual_npage;
+   struct page *head;
+   int ret = 0;
+   unsigned int contiguous_npage;
+   struct page **pages = NULL;
+   unsigned int flags = 0;
+
+   if ((npage < 0) || !pfn_valid(pfn))
+   return -EINVAL;
+
+   /* all pages are done? */
+   if (!npage)
+   goto out;
+   /*
+* Since pfn is valid,
+* hugetlb_residual_npage is greater than or equal to 1.
+*/
+   head = compound_head(pfn_to_page(pfn));
+   hugetlb_residual_npage = hugetlb_get_residual_pages(vaddr,
+   compound_order(head));
+   /* The page of vaddr has been gotten by vaddr_get_pfn */
+   contiguous_npage = min_t(long, (hugetlb_residual_npage - 1), npage);
+   /* There is on page left in this hugetlb page. */
+   if (!contiguous_npage)
+   goto out;
+
+   pages = kvmalloc_array(contiguous_npage, sizeof(struct page *), 
GFP_KERNEL);
+   if (!pages)
+   return -ENOMEM;
+
+   if (prot & IOMMU_WRITE)
+   flags |= FOLL_WRITE;
+
+   mmap_read_lock(mm);
+   /* The number of pages pinned may be less than contiguous_npage */
+   ret = pin_user_pages_remote(NULL, mm, vaddr + PAGE_SIZE, 
contiguous_npage,
+   flags | FOLL_LONGTERM, pages, NULL, NULL);
+   mmap_r

[PATCH V4 0/2] vfio: optimized for hugetlbf pages when dma map/unmap

2020-09-08 Thread Ming Mao
This series deletes the for loop in dma_map/unmap for hugetlb pages.
In the original process, the for loop could spend much time to check all
normal pages.If we use hugetlb pages, it is not necessary to do this.

Changes from v3
- add a new API unpin_user_hugetlb_pages_dirty_lock()
- use the new API to unpin hugetlb pages

Ming Mao (2):
  vfio dma_map/unmap: optimized for hugetlbfs pages
  vfio: optimized for unpinning pages

 drivers/vfio/vfio_iommu_type1.c | 373 ++--
 include/linux/mm.h  |   3 +
 mm/gup.c|  91 
 3 files changed, 450 insertions(+), 17 deletions(-)

-- 
2.23.0




[PATCH V3] vfio dma_map/unmap: optimized for hugetlbfs pages

2020-08-28 Thread Ming Mao
In the original process of dma_map/unmap pages for VFIO-devices,
to make sure the pages are contiguous, we have to check them one by one.
As a result, dma_map/unmap could spend a long time.
Using the hugetlb pages, we can avoid this problem.
All pages in hugetlb pages are contiguous.And the hugetlb
page should not be split.So we can delete the for loops.

Signed-off-by: Ming Mao 
---
 drivers/vfio/vfio_iommu_type1.c | 393 +++-
 1 file changed, 382 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5e556ac91..a689b9698 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -479,6 +479,303 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
long vaddr,
return ret;
 }
 
+static bool is_hugetlb_page(unsigned long pfn)
+{
+   struct page *page;
+
+   if (!pfn_valid(pfn))
+   return false;
+
+   page = pfn_to_page(pfn);
+   /* only check for hugetlb pages */
+   return page && PageHuge(page);
+}
+
+static bool vaddr_is_hugetlb_page(unsigned long vaddr, int prot)
+{
+   unsigned long pfn;
+   int ret;
+   bool result;
+
+   if (!current->mm)
+   return false;
+
+   ret = vaddr_get_pfn(current->mm, vaddr, prot, );
+   if (ret)
+   return false;
+
+   result = is_hugetlb_page(pfn);
+
+   put_pfn(pfn, prot);
+
+   return result;
+}
+
+/*
+ * get the number of residual PAGE_SIZE-pages in a hugetlb page
+ * (including the page which pointed by this address)
+ * @address: we count residual pages from this address to the end of
+ * a hugetlb page
+ * @order: the order of the same hugetlb page
+ */
+static long
+hugetlb_get_residual_pages(unsigned long address, unsigned int order)
+{
+   unsigned long hugetlb_npage;
+   unsigned long hugetlb_mask;
+
+   if (!order)
+   return -EINVAL;
+
+   hugetlb_npage = 1UL << order;
+   hugetlb_mask = hugetlb_npage - 1;
+   address = address >> PAGE_SHIFT;
+
+   /*
+* Since we count the page pointed by this address, the number of
+* residual PAGE_SIZE-pages is greater than or equal to 1.
+*/
+   return hugetlb_npage - (address & hugetlb_mask);
+}
+
+static unsigned int
+hugetlb_page_get_externally_pinned_num(struct vfio_dma *dma,
+   unsigned long start,
+   unsigned long npage)
+{
+   struct vfio_pfn *vpfn;
+   struct rb_node *node;
+   unsigned long end;
+   unsigned int num = 0;
+
+   if (!dma || !npage)
+   return 0;
+
+   end = start + npage - 1;
+   /* If we find a page in dma->pfn_list, this page has been pinned 
externally */
+   for (node = rb_first(>pfn_list); node; node = rb_next(node)) {
+   vpfn = rb_entry(node, struct vfio_pfn, node);
+   if ((vpfn->pfn >= start) && (vpfn->pfn <= end))
+   num++;
+   }
+
+   return num;
+}
+
+static long hugetlb_page_vaddr_get_pfn(struct mm_struct *mm, unsigned long 
vaddr,
+   int prot, long npage, unsigned long pfn)
+{
+   long hugetlb_residual_npage;
+   struct page *head;
+   int ret = 0;
+   unsigned int contiguous_npage;
+   struct page **pages = NULL;
+   unsigned int flags = 0;
+
+   if ((npage < 0) || !pfn_valid(pfn))
+   return -EINVAL;
+
+   /* all pages are done? */
+   if (!npage)
+   goto out;
+   /*
+* Since pfn is valid,
+* hugetlb_residual_npage is greater than or equal to 1.
+*/
+   head = compound_head(pfn_to_page(pfn));
+   hugetlb_residual_npage = hugetlb_get_residual_pages(vaddr,
+   compound_order(head));
+   /* The page of vaddr has been gotten by vaddr_get_pfn */
+   contiguous_npage = min_t(long, (hugetlb_residual_npage - 1), npage);
+   /* There is on page left in this hugetlb page. */
+   if (!contiguous_npage)
+   goto out;
+
+   pages = kvmalloc_array(contiguous_npage, sizeof(struct page *), 
GFP_KERNEL);
+   if (!pages)
+   return -ENOMEM;
+
+   if (prot & IOMMU_WRITE)
+   flags |= FOLL_WRITE;
+
+   mmap_read_lock(mm);
+   /* The number of pages pinned may be less than contiguous_npage */
+   ret = pin_user_pages_remote(NULL, mm, vaddr + PAGE_SIZE, 
contiguous_npage,
+   flags | FOLL_LONGTERM, pages, NULL, NULL);
+   mmap_read_unlock(mm);
+out:
+   if (pages)
+   kvfree(pages);
+   return ret;
+}
+
+/*
+ * put pfns for a hugetlb page
+ * @start:the PAGE_SIZE-page we start to put,can be any page in this hugetlb 
page
+ * @npage:the number of PAGE_SIZE-pages need to put
+ * @prot:IOMMU_READ/WRITE
+ *

[PATCH V2] vfio dma_map/unmap: optimized for hugetlbfs pages

2020-08-13 Thread Ming Mao
In the original process of pinning/unpinning pages for VFIO-devices,
to make sure the pages are contiguous, we have to check them one by one.
As a result, dma_map/unmap could spend a long time.
Using the hugetlb pages, we can avoid this problem.
All pages in hugetlb pages are contiguous.And the hugetlb
page should not be split.So we can delete the for loops and use
some operations(such as atomic_add,page_ref_add) instead.

Signed-off-by: Ming Mao 
---
 drivers/vfio/vfio_iommu_type1.c | 233 +++-
 1 file changed, 230 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5e556ac91..8957013c1 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -415,6 +415,46 @@ static int put_pfn(unsigned long pfn, int prot)
return 0;
 }
 
+/*
+ * put pfns for a hugetlb page
+ * @start:the PAGE_SIZE-page we start to put,can be any page in this hugetlb 
page
+ * @npage:the number of PAGE_SIZE-pages need to put
+ * @prot:IOMMU_READ/WRITE
+ */
+static int hugetlb_put_pfn(unsigned long start, unsigned int npage, int prot)
+{
+   struct page *page;
+   struct page *head;
+
+   if (!npage || !pfn_valid(start))
+   return 0;
+
+   page = pfn_to_page(start);
+   if (!page || !PageHuge(page))
+   return 0;
+   head = compound_head(page);
+   /*
+* The last page should be in this hugetlb page.
+* The number of putting pages should be equal to the number
+* of getting pages.So the hugepage pinned refcount and the normal
+* page refcount can not be smaller than npage.
+*/
+   if ((head != compound_head(pfn_to_page(start + npage - 1)))
+   || (page_ref_count(head) < npage)
+   || (compound_pincount(page) < npage))
+   return 0;
+
+   if ((prot & IOMMU_WRITE) && !PageDirty(page))
+   set_page_dirty_lock(page);
+
+   atomic_sub(npage, compound_pincount_ptr(head));
+   if (page_ref_sub_and_test(head, npage))
+   __put_page(head);
+
+   mod_node_page_state(page_pgdat(head), NR_FOLL_PIN_RELEASED, npage);
+   return 1;
+}
+
 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
@@ -479,6 +519,105 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
long vaddr,
return ret;
 }
 
+static bool is_hugetlbpage(unsigned long pfn)
+{
+   struct page *page;
+
+   if (!pfn_valid(pfn))
+   return false;
+
+   page = pfn_to_page(pfn);
+   /* only check for hugetlb pages */
+   if (!page || !PageHuge(page))
+   return false;
+
+   return true;
+}
+
+/*
+ * get the number of residual PAGE_SIZE-pages in a hugetlb page
+ * (including the page which pointed by this address)
+ * @address: we count residual pages from this address to the end of
+ * a hugetlb page
+ * @order: the order of the same hugetlb page
+ */
+static long
+hugetlb_get_residual_pages(unsigned long address, unsigned int order)
+{
+   unsigned long hugetlb_npage;
+   unsigned long hugetlb_mask;
+
+   if (!order)
+   return -1;
+
+   hugetlb_npage = _AC(1, UL) << order;
+   hugetlb_mask = (hugetlb_npage << PAGE_SHIFT) - 1;
+   address = ALIGN_DOWN(address, PAGE_SIZE);
+
+   /*
+* Since we count the page pointed by this address, the number of
+* residual PAGE_SIZE-pages is greater than or equal to 1.
+*/
+   return hugetlb_npage - ((address & hugetlb_mask) >> PAGE_SHIFT);
+}
+
+static unsigned int
+hugetlb_page_get_externally_pinned_num(struct vfio_dma *dma,
+   unsigned long start,
+   unsigned long npage)
+{
+   struct vfio_pfn *vpfn;
+   struct rb_node *node;
+   unsigned long end = start + npage - 1;
+   unsigned int num = 0;
+
+   if (!dma || !npage)
+   return 0;
+
+   /* If we find a page in dma->pfn_list, this page has been pinned 
externally */
+   for (node = rb_first(>pfn_list); node; node = rb_next(node)) {
+   vpfn = rb_entry(node, struct vfio_pfn, node);
+   if ((vpfn->pfn >= start) && (vpfn->pfn <= end))
+   num++;
+   }
+
+   return num;
+}
+
+static long hugetlb_page_vaddr_get_pfn(unsigned long vaddr, long npage,
+   unsigned long pfn)
+{
+   long hugetlb_residual_npage;
+   long contiguous_npage;
+   struct page *head = compound_head(pfn_to_page(pfn));
+
+   /*
+* If pfn is valid,
+* hugetlb_residual_npage is greater than or equal to 1.
+*/
+   hugetlb_residual_npage = hugetlb_get_residual_pages(vaddr,
+