Pinning pages from ZONE_DEVICE memory needs to check the backing device's
live-ness, which is tracked in the device's dev_pagemap metadata. This
metadata is stored in a radix tree and looking it up adds measurable
software overhead.

This patch avoids repeating this relatively costly operation when
dev_pagemap is used by caching the last dev_pagemap while getting user
pages. The gup_benchmark reports this reduces the time to get user pages
to as low as 1/3 of the previous time.

Cc: Kirill Shutemov <kirill.shute...@linux.intel.com>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Signed-off-by: Keith Busch <keith.bu...@intel.com>
---
 include/linux/mm.h |  8 +++++++-
 mm/gup.c           | 41 ++++++++++++++++++++++++-----------------
 mm/huge_memory.c   | 35 +++++++++++++++--------------------
 3 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1fd241c9071..d688e18a19c4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -380,6 +380,7 @@ struct vm_fault {
 
 struct follow_page_context {
        struct vm_area_struct *vma;
+       struct dev_pagemap *pgmap;
        unsigned long address;
        unsigned int page_mask;
        unsigned int flags;
@@ -2546,14 +2547,19 @@ struct page *follow_page_mask(struct 
follow_page_context *ctx);
 static inline struct page *follow_page(struct vm_area_struct *vma,
                unsigned long address, unsigned int foll_flags)
 {
+       struct page *page;
        struct follow_page_context ctx = {
                .vma = vma,
+               .pgmap = NULL,
                .address = address,
                .page_mask = 0,
                .flags = foll_flags,
        };
 
-       return follow_page_mask(&ctx);
+       page = follow_page_mask(&ctx);
+       if (ctx.pgmap)
+               put_dev_pagemap(ctx.pgmap);
+       return page;
 }
 
 #define FOLL_WRITE     0x01    /* check pte is writable */
diff --git a/mm/gup.c b/mm/gup.c
index 4c4da54f8dbe..c98ea05eaa59 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -72,7 +72,6 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned 
int flags)
 static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t 
*pmd)
 {
        struct mm_struct *mm = ctx->vma->vm_mm;
-       struct dev_pagemap *pgmap = NULL;
        struct page *page;
        spinlock_t *ptl;
        pte_t *ptep, pte;
@@ -114,8 +113,8 @@ static struct page *follow_page_pte(struct 
follow_page_context *ctx, pmd_t *pmd)
                 * Only return device mapping pages in the FOLL_GET case since
                 * they are only valid while holding the pgmap reference.
                 */
-               pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
-               if (pgmap)
+               ctx->pgmap = get_dev_pagemap(pte_pfn(pte), ctx->pgmap);
+               if (ctx->pgmap)
                        page = pte_page(pte);
                else
                        goto no_page;
@@ -154,9 +153,9 @@ static struct page *follow_page_pte(struct 
follow_page_context *ctx, pmd_t *pmd)
                get_page(page);
 
                /* drop the pgmap reference now that we hold the page */
-               if (pgmap) {
-                       put_dev_pagemap(pgmap);
-                       pgmap = NULL;
+               if (ctx->pgmap) {
+                       put_dev_pagemap(ctx->pgmap);
+                       ctx->pgmap = NULL;
                }
        }
        if (ctx->flags & FOLL_TOUCH) {
@@ -645,7 +644,7 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
                unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas, int *nonblocking)
 {
-       long i = 0;
+       long ret = 0, i = 0;
        struct vm_area_struct *vma = NULL;
        struct follow_page_context ctx = {};
 
@@ -681,8 +680,10 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
                                goto next_page;
                        }
 
-                       if (!vma || check_vma_flags(vma, gup_flags))
-                               return i ? : -EFAULT;
+                       if (!vma || check_vma_flags(vma, gup_flags)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
                        if (is_vm_hugetlb_page(vma)) {
                                i = follow_hugetlb_page(mm, vma, pages, vmas,
                                                &start, &nr_pages, i,
@@ -697,23 +698,25 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
                 * If we have a pending SIGKILL, don't keep faulting pages and
                 * potentially allocating memory.
                 */
-               if (unlikely(fatal_signal_pending(current)))
-                       return i ? i : -ERESTARTSYS;
+               if (unlikely(fatal_signal_pending(current))) {
+                       ret = -ERESTARTSYS;
+                       goto out;
+               }
                cond_resched();
 
                page = follow_page_mask(&ctx);
                if (!page) {
-                       int ret;
                        ret = faultin_page(tsk, &ctx, nonblocking);
                        switch (ret) {
                        case 0:
                                goto retry;
+                       case -EBUSY:
+                               ret = 0;
+                               /* FALLTHRU */
                        case -EFAULT:
                        case -ENOMEM:
                        case -EHWPOISON:
-                               return i ? i : ret;
-                       case -EBUSY:
-                               return i;
+                               goto out;
                        case -ENOENT:
                                goto next_page;
                        }
@@ -725,7 +728,8 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
                         */
                        goto next_page;
                } else if (IS_ERR(page)) {
-                       return i ? i : PTR_ERR(page);
+                       ret = PTR_ERR(page);
+                       goto out;
                }
                if (pages) {
                        pages[i] = page;
@@ -745,7 +749,10 @@ static long __get_user_pages(struct task_struct *tsk, 
struct mm_struct *mm,
                start += page_increm * PAGE_SIZE;
                nr_pages -= page_increm;
        } while (nr_pages);
-       return i;
+out:
+       if (ctx.pgmap)
+               put_dev_pagemap(ctx.pgmap);
+       return i ? i : ret;
 }
 
 static bool vma_permits_fault(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index abd36e6afe2c..6787011385ce 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -851,12 +851,23 @@ static void touch_pmd(struct vm_area_struct *vma, 
unsigned long addr,
                update_mmu_cache_pmd(vma, addr, pmd);
 }
 
+static struct page *pagemap_page(struct follow_page_context *ctx,
+                                unsigned long pfn)
+{
+       struct page *page;
+
+       ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
+       if (!ctx->pgmap)
+               return ERR_PTR(-EFAULT);
+       page = pfn_to_page(pfn);
+       get_page(page);
+       return page;
+}
+
 struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
 {
        unsigned long pfn = pmd_pfn(*pmd);
        struct mm_struct *mm = ctx->vma->vm_mm;
-       struct dev_pagemap *pgmap;
-       struct page *page;
 
        assert_spin_locked(pmd_lockptr(mm, pmd));
 
@@ -885,14 +896,7 @@ struct page *follow_devmap_pmd(struct follow_page_context 
*ctx, pmd_t *pmd)
                return ERR_PTR(-EEXIST);
 
        pfn += (ctx->address & ~PMD_MASK) >> PAGE_SHIFT;
-       pgmap = get_dev_pagemap(pfn, NULL);
-       if (!pgmap)
-               return ERR_PTR(-EFAULT);
-       page = pfn_to_page(pfn);
-       get_page(page);
-       put_dev_pagemap(pgmap);
-
-       return page;
+       return pagemap_page(ctx, pfn);
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1002,8 +1006,6 @@ struct page *follow_devmap_pud(struct follow_page_context 
*ctx, pud_t *pud)
 {
        unsigned long pfn = pud_pfn(*pud);
        struct mm_struct *mm = ctx->vma->vm_mm;
-       struct dev_pagemap *pgmap;
-       struct page *page;
 
        assert_spin_locked(pud_lockptr(mm, pud));
 
@@ -1026,14 +1028,7 @@ struct page *follow_devmap_pud(struct 
follow_page_context *ctx, pud_t *pud)
                return ERR_PTR(-EEXIST);
 
        pfn += (ctx->address & ~PUD_MASK) >> PAGE_SHIFT;
-       pgmap = get_dev_pagemap(pfn, NULL);
-       if (!pgmap)
-               return ERR_PTR(-EFAULT);
-       page = pfn_to_page(pfn);
-       get_page(page);
-       put_dev_pagemap(pgmap);
-
-       return page;
+       return pagemap_page(ctx, pfn);
 }
 
 int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-- 
2.14.4

Reply via email to