From: Dave Hansen <[email protected]>

7 out of 9 of the page walkers need the VMA and pass it in some
way through mm_walk->private.  Let's add it in the page walker
infrastructure.

This will increase the number of find_vma() calls, but the VMA
cache should help us out pretty nicely here.  This is also quite
easy to optimize if this turns out to be an issue by skipping the
find_vma() call if 'addr' is still within our current
mm_walk->vma.

/proc/$pid/numa_map:
/proc/$pid/smaps:
        lots of stuff including vma (vma is a drop in the bucket)
        in a struct
/proc/$pid/clear_refs:
        passes vma plus an enum in a struct
/proc/$pid/pagemap:
openrisc:
        no VMA
MADV_WILLNEED:
        walk->private is set to vma
cgroup precharge:
        walk->private is set to vma
cgroup move charge:
        walk->private is set to vma
powerpc subpages:
        walk->private is set to vma

Signed-off-by: Dave Hansen <[email protected]>
---

 b/arch/powerpc/mm/subpage-prot.c |    3 --
 b/fs/proc/task_mmu.c             |   25 ++++++---------------
 b/include/linux/mm.h             |    1 
 b/mm/madvise.c                   |    3 --
 b/mm/memcontrol.c                |    4 +--
 b/mm/pagewalk.c                  |   45 ++++++++++++++++++++++++++++++++++-----
 6 files changed, 52 insertions(+), 29 deletions(-)

diff -puN arch/powerpc/mm/subpage-prot.c~page-walker-pass-vma 
arch/powerpc/mm/subpage-prot.c
--- a/arch/powerpc/mm/subpage-prot.c~page-walker-pass-vma       2014-06-02 
14:20:19.524817706 -0700
+++ b/arch/powerpc/mm/subpage-prot.c    2014-06-02 14:20:19.536818243 -0700
@@ -134,7 +134,7 @@ static void subpage_prot_clear(unsigned
 static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
                                  unsigned long end, struct mm_walk *walk)
 {
-       struct vm_area_struct *vma = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        split_huge_page_pmd(vma, addr, pmd);
        return 0;
 }
@@ -163,7 +163,6 @@ static void subpage_mark_vma_nohuge(stru
                if (vma->vm_start >= (addr + len))
                        break;
                vma->vm_flags |= VM_NOHUGEPAGE;
-               subpage_proto_walk.private = vma;
                walk_page_range(vma->vm_start, vma->vm_end,
                                &subpage_proto_walk);
                vma = vma->vm_next;
diff -puN fs/proc/task_mmu.c~page-walker-pass-vma fs/proc/task_mmu.c
--- a/fs/proc/task_mmu.c~page-walker-pass-vma   2014-06-02 14:20:19.526817794 
-0700
+++ b/fs/proc/task_mmu.c        2014-06-02 14:20:19.537818287 -0700
@@ -424,7 +424,6 @@ const struct file_operations proc_tid_ma
 
 #ifdef CONFIG_PROC_PAGE_MONITOR
 struct mem_size_stats {
-       struct vm_area_struct *vma;
        unsigned long resident;
        unsigned long shared_clean;
        unsigned long shared_dirty;
@@ -443,7 +442,7 @@ static void smaps_pte_entry(pte_t ptent,
                unsigned long ptent_size, struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        pgoff_t pgoff = linear_page_index(vma, addr);
        struct page *page = NULL;
        int mapcount;
@@ -495,7 +494,7 @@ static int smaps_pte_range(pmd_t *pmd, u
                           struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
 
@@ -588,7 +587,6 @@ static int show_smap(struct seq_file *m,
        };
 
        memset(&mss, 0, sizeof mss);
-       mss.vma = vma;
        /* mmap_sem is held in m_start */
        if (vma->vm_mm)
                walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
@@ -712,7 +710,6 @@ enum clear_refs_types {
 };
 
 struct clear_refs_private {
-       struct vm_area_struct *vma;
        enum clear_refs_types type;
 };
 
@@ -748,7 +745,7 @@ static int clear_refs_pte_range(pmd_t *p
                                unsigned long end, struct mm_walk *walk)
 {
        struct clear_refs_private *cp = walk->private;
-       struct vm_area_struct *vma = cp->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
@@ -828,7 +825,6 @@ static ssize_t clear_refs_write(struct f
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_start(mm, 0, -1);
                for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                       cp.vma = vma;
                        /*
                         * Writing 1 to /proc/pid/clear_refs affects all pages.
                         *
@@ -1073,15 +1069,11 @@ static int pagemap_hugetlb_range(pte_t *
                                 struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-       struct vm_area_struct *vma;
        int err = 0;
        int flags2;
        pagemap_entry_t pme;
 
-       vma = find_vma(walk->mm, addr);
-       WARN_ON_ONCE(!vma);
-
-       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+       if (walk->vma && (walk->vma->vm_flags & VM_SOFTDIRTY))
                flags2 = __PM_SOFT_DIRTY;
        else
                flags2 = 0;
@@ -1241,7 +1233,6 @@ const struct file_operations proc_pagema
 #ifdef CONFIG_NUMA
 
 struct numa_maps {
-       struct vm_area_struct *vma;
        unsigned long pages;
        unsigned long anon;
        unsigned long active;
@@ -1317,11 +1308,11 @@ static int gather_pte_stats(pmd_t *pmd,
 
        md = walk->private;
 
-       if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+       if (pmd_trans_huge_lock(pmd, walk->vma, &ptl) == 1) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
-               page = can_gather_numa_stats(huge_pte, md->vma, addr);
+               page = can_gather_numa_stats(huge_pte, walk->vma, addr);
                if (page)
                        gather_stats(page, md, pte_dirty(huge_pte),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
@@ -1333,7 +1324,7 @@ static int gather_pte_stats(pmd_t *pmd,
                return 0;
        orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
        do {
-               struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
+               struct page *page = can_gather_numa_stats(*pte, walk->vma, 
addr);
                if (!page)
                        continue;
                gather_stats(page, md, pte_dirty(*pte), 1);
@@ -1392,8 +1383,6 @@ static int show_numa_map(struct seq_file
        /* Ensure we start with an empty set of numa_maps statistics. */
        memset(md, 0, sizeof(*md));
 
-       md->vma = vma;
-
        walk.hugetlb_entry = gather_hugetbl_stats;
        walk.pmd_entry = gather_pte_stats;
        walk.private = md;
diff -puN include/linux/mm.h~page-walker-pass-vma include/linux/mm.h
--- a/include/linux/mm.h~page-walker-pass-vma   2014-06-02 14:20:19.528817884 
-0700
+++ b/include/linux/mm.h        2014-06-02 14:20:19.538818332 -0700
@@ -1118,6 +1118,7 @@ struct mm_walk {
                             unsigned long addr, unsigned long next,
                             struct mm_walk *walk);
        struct mm_struct *mm;
+       struct vm_area_struct *vma;
        void *private;
 };
 
diff -puN mm/madvise.c~page-walker-pass-vma mm/madvise.c
--- a/mm/madvise.c~page-walker-pass-vma 2014-06-02 14:20:19.529817929 -0700
+++ b/mm/madvise.c      2014-06-02 14:20:19.539818378 -0700
@@ -139,7 +139,7 @@ static int swapin_walk_pmd_entry(pmd_t *
        unsigned long end, struct mm_walk *walk)
 {
        pte_t *orig_pte;
-       struct vm_area_struct *vma = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        unsigned long index;
 
        if (pmd_none_or_trans_huge_or_clear_bad(pmd))
@@ -176,7 +176,6 @@ static void force_swapin_readahead(struc
        struct mm_walk walk = {
                .mm = vma->vm_mm,
                .pmd_entry = swapin_walk_pmd_entry,
-               .private = vma,
        };
 
        walk_page_range(start, end, &walk);
diff -puN mm/memcontrol.c~page-walker-pass-vma mm/memcontrol.c
--- a/mm/memcontrol.c~page-walker-pass-vma      2014-06-02 14:20:19.532818064 
-0700
+++ b/mm/memcontrol.c   2014-06-02 14:20:19.541818468 -0700
@@ -6786,7 +6786,7 @@ static int mem_cgroup_count_precharge_pt
                                        unsigned long addr, unsigned long end,
                                        struct mm_walk *walk)
 {
-       struct vm_area_struct *vma = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
 
@@ -6962,7 +6962,7 @@ static int mem_cgroup_move_charge_pte_ra
                                struct mm_walk *walk)
 {
        int ret = 0;
-       struct vm_area_struct *vma = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
        enum mc_target_type target_type;
diff -puN mm/pagewalk.c~page-walker-pass-vma mm/pagewalk.c
--- a/mm/pagewalk.c~page-walker-pass-vma        2014-06-02 14:20:19.533818109 
-0700
+++ b/mm/pagewalk.c     2014-06-02 14:20:19.542818513 -0700
@@ -3,6 +3,38 @@
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
 
+
+/*
+ * The VMA which applies to the current place in the
+ * page walk is tracked in walk->vma.  If there is
+ * no VMA covering the current area (when in a pte_hole)
+ * walk->vma will be NULL.
+ *
+ * If the area bing walked is covered by more than one
+ * VMA, then the first one will be set in walk->vma.
+ * Additional VMAs can be found by walking the VMA sibling
+ * list, or by calling this function or find_vma() directly.
+ *
+ * In a situation where the area being walked is not
+ * entirely covered by a VMA, the _first_ VMA which covers
+ * part of the area will be set in walk->vma.
+ */
+static void walk_update_vma(unsigned long addr, unsigned long end,
+                    struct mm_walk *walk)
+{
+       struct vm_area_struct *new_vma = find_vma(walk->mm, addr);
+
+       /*
+        * find_vma() is not exact and returns the next VMA
+        * ending after addr.  The vma we found may be outside
+        * the range which we are walking, so clear it if so.
+        */
+       if (new_vma && new_vma->vm_start >= end)
+               new_vma = NULL;
+
+       walk->vma = new_vma;
+}
+
 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                          struct mm_walk *walk)
 {
@@ -15,6 +47,7 @@ static int walk_pte_range(pmd_t *pmd, un
                if (err)
                       break;
                addr += PAGE_SIZE;
+               walk_update_vma(addr, addr + PAGE_SIZE, walk);
                if (addr == end)
                        break;
                pte++;
@@ -35,6 +68,7 @@ static int walk_pmd_range(pud_t *pud, un
        do {
 again:
                next = pmd_addr_end(addr, end);
+               walk_update_vma(addr, next, walk);
                if (pmd_none(*pmd)) {
                        if (walk->pte_hole)
                                err = walk->pte_hole(addr, next, walk);
@@ -79,6 +113,7 @@ static int walk_pud_range(pgd_t *pgd, un
        pud = pud_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
+               walk_update_vma(addr, next, walk);
                if (pud_none_or_clear_bad(pud)) {
                        if (walk->pte_hole)
                                err = walk->pte_hole(addr, next, walk);
@@ -105,10 +140,10 @@ static unsigned long hugetlb_entry_end(s
        return boundary < end ? boundary : end;
 }
 
-static int walk_hugetlb_range(struct vm_area_struct *vma,
-                             unsigned long addr, unsigned long end,
+static int walk_hugetlb_range(unsigned long addr, unsigned long end,
                              struct mm_walk *walk)
 {
+       struct vm_area_struct *vma = walk->vma;
        struct hstate *h = hstate_vma(vma);
        unsigned long next;
        unsigned long hmask = huge_page_mask(h);
@@ -187,14 +222,14 @@ int walk_page_range(unsigned long addr,
                struct vm_area_struct *vma = NULL;
 
                next = pgd_addr_end(addr, end);
-
+               walk_update_vma(addr, next, walk);
                /*
                 * This function was not intended to be vma based.
                 * But there are vma special cases to be handled:
                 * - hugetlb vma's
                 * - VM_PFNMAP vma's
                 */
-               vma = find_vma(walk->mm, addr);
+               vma = walk->vma;
                if (vma && (vma->vm_start <= addr)) {
                        /*
                         * There are no page structures backing a VM_PFNMAP
@@ -219,7 +254,7 @@ int walk_page_range(unsigned long addr,
                                 * so walk through hugetlb entries within a
                                 * given vma.
                                 */
-                               err = walk_hugetlb_range(vma, addr, next, walk);
+                               err = walk_hugetlb_range(addr, next, walk);
                                if (err)
                                        break;
                                pgd = pgd_offset(walk->mm, next);
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to