From: John Hubbard <jhubb...@nvidia.com>

Add five new /proc/vmstat items, to provide some
visibility into what get_user_pages() and put_user_page()
are doing.

After booting and running fio (https://github.com/axboe/fio)
a few times on an NVMe device, as a way to get lots of
get_user_pages_fast() calls, the counters look like this:

$ cat /proc/vmstat |grep gup
nr_gup_slow_pages_requested 21319
nr_gup_fast_pages_requested 11533792
nr_gup_fast_page_backoffs 0
nr_gup_page_count_overflows 0
nr_gup_pages_returned 11555104

Interpretation of the above:
   Total gup requests (slow + fast): 11555111
   Total put_user_page calls:        11555104

This shows 7 more calls to get_user_pages(), than to
put_user_page(). That may, or may not, represent a
problem worth investigating.

Normally, those last two numbers should be equal, but a
couple of things may cause them to differ:

1) Inherent race condition in reading /proc/vmstat values.

2) Bugs at any of the get_user_pages*() call sites. Those
sites need to match get_user_pages() and put_user_page() calls.

Signed-off-by: John Hubbard <jhubb...@nvidia.com>
---
 include/linux/mmzone.h |  5 +++++
 mm/gup.c               | 20 ++++++++++++++++++++
 mm/swap.c              |  1 +
 mm/vmstat.c            |  5 +++++
 4 files changed, 31 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 842f9189537b..f20c14958a2b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -183,6 +183,11 @@ enum node_stat_item {
        NR_DIRTIED,             /* page dirtyings since bootup */
        NR_WRITTEN,             /* page writings since bootup */
        NR_KERNEL_MISC_RECLAIMABLE,     /* reclaimable non-slab kernel pages */
+       NR_GUP_SLOW_PAGES_REQUESTED,    /* via: get_user_pages() */
+       NR_GUP_FAST_PAGES_REQUESTED,    /* via: get_user_pages_fast() */
+       NR_GUP_FAST_PAGE_BACKOFFS,      /* gup_fast() lost to page_mkclean() */
+       NR_GUP_PAGE_COUNT_OVERFLOWS,    /* gup count overflowed: gup() failed */
+       NR_GUP_PAGES_RETURNED,          /* via: put_user_page() */
        NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/gup.c b/mm/gup.c
index 3291da342f9c..848ee7899831 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -37,6 +37,8 @@ int get_gup_pin_page(struct page *page)
        page = compound_head(page);
 
        if (page_ref_count(page) >= (UINT_MAX - GUP_PIN_COUNTING_BIAS)) {
+               mod_node_page_state(page_pgdat(page),
+                                   NR_GUP_PAGE_COUNT_OVERFLOWS, 1);
                WARN_ONCE(1, "get_user_pages pin count overflowed");
                return -EOVERFLOW;
        }
@@ -184,6 +186,8 @@ static struct page *follow_page_pte(struct vm_area_struct 
*vma,
                        page = ERR_PTR(ret);
                        goto out;
                }
+               mod_node_page_state(page_pgdat(page),
+                                   NR_GUP_SLOW_PAGES_REQUESTED, 1);
        }
        if (flags & FOLL_TOUCH) {
                if ((flags & FOLL_WRITE) &&
@@ -527,6 +531,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned 
long address,
        ret = get_gup_pin_page(*page);
        if (ret)
                goto unmap;
+
+       mod_node_page_state(page_pgdat(*page), NR_GUP_SLOW_PAGES_REQUESTED, 1);
 out:
        ret = 0;
 unmap:
@@ -1461,7 +1467,12 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
                if (!page_cache_gup_pin_speculative(head))
                        goto pte_unmap;
 
+               mod_node_page_state(page_pgdat(head),
+                                   NR_GUP_FAST_PAGES_REQUESTED, 1);
+
                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+                       mod_node_page_state(page_pgdat(head),
+                                           NR_GUP_FAST_PAGE_BACKOFFS, 1);
                        put_user_page(head);
                        goto pte_unmap;
                }
@@ -1522,6 +1533,9 @@ static int __gup_device_huge(unsigned long pfn, unsigned 
long addr,
                        return 0;
                }
 
+               mod_node_page_state(page_pgdat(page),
+                                   NR_GUP_FAST_PAGES_REQUESTED, 1);
+
                (*nr)++;
                pfn++;
        } while (addr += PAGE_SIZE, addr != end);
@@ -1607,6 +1621,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned 
long addr,
                return 0;
        }
 
+       mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
        if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
                *nr -= refs;
                put_user_page(head);
@@ -1644,6 +1660,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned 
long addr,
                return 0;
        }
 
+       mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
        if (unlikely(pud_val(orig) != pud_val(*pudp))) {
                *nr -= refs;
                put_user_page(head);
@@ -1680,6 +1698,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned 
long addr,
                return 0;
        }
 
+       mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
        if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
                *nr -= refs;
                put_user_page(head);
diff --git a/mm/swap.c b/mm/swap.c
index 39b0ddd35933..49e192f242d4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -150,6 +150,7 @@ void put_user_page(struct page *page)
 
        VM_BUG_ON_PAGE(page_ref_count(page) < GUP_PIN_COUNTING_BIAS, page);
 
+       mod_node_page_state(page_pgdat(page), NR_GUP_PAGES_RETURNED, 1);
        page_ref_sub(page, GUP_PIN_COUNTING_BIAS);
 }
 EXPORT_SYMBOL(put_user_page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 83b30edc2f7f..18a1a4a2dd29 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1164,6 +1164,11 @@ const char * const vmstat_text[] = {
        "nr_dirtied",
        "nr_written",
        "nr_kernel_misc_reclaimable",
+       "nr_gup_slow_pages_requested",
+       "nr_gup_fast_pages_requested",
+       "nr_gup_fast_page_backoffs",
+       "nr_gup_page_count_overflows",
+       "nr_gup_pages_returned",
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",
-- 
2.20.1

Reply via email to