From: Dan Williams <dan.j.willi...@intel.com>

When device-dax is operating in huge-page mode we want it to behave like
hugetlbfs and report the MMU page mapping size that is being enforced by
the vma. Similar to commit 31383c6865a5 "mm, hugetlbfs: introduce
->split() to vm_operations_struct" it would be messy to teach
vma_mmu_pagesize() about device-dax page mapping sizes in the same
(hstate) way that hugetlbfs communicates this attribute.  Instead, these
patches introduce a new ->pagesize() vm operation.

Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Reported-by: Jane Chu <jane....@oracle.com>
Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
Signed-off-by: Dave Jiang <dave.ji...@intel.com>
---
 arch/powerpc/mm/hugetlbpage.c |    5 +----
 include/linux/mm.h            |    1 +
 mm/hugetlb.c                  |   23 ++++++++++++-----------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a9b9083c5e49..c6a2e577e842 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -568,10 +568,7 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
        if (!radix_enabled())
                return 1UL << mmu_psize_to_shift(psize);
 #endif
-       if (!is_vm_hugetlb_page(vma))
-               return PAGE_SIZE;
-
-       return huge_page_size(hstate_vma(vma));
+       return vma_kernel_pagesize(vma);
 }
 
 static inline bool is_power_of_4(unsigned long x)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ea818ff739cd..37b9aef91ec7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -383,6 +383,7 @@ struct vm_operations_struct {
        int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
        void (*map_pages)(struct vm_fault *vmf,
                        pgoff_t start_pgoff, pgoff_t end_pgoff);
+       unsigned long (*pagesize)(struct vm_area_struct * area);
 
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9a334f5fb730..8fa069b5cb4d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -637,14 +637,9 @@ EXPORT_SYMBOL_GPL(linear_hugepage_index);
  */
 unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 {
-       struct hstate *hstate;
-
-       if (!is_vm_hugetlb_page(vma))
-               return PAGE_SIZE;
-
-       hstate = hstate_vma(vma);
-
-       return 1UL << huge_page_shift(hstate);
+       if (vma->vm_ops && vma->vm_ops->pagesize)
+               return vma->vm_ops->pagesize(vma);
+       return PAGE_SIZE;
 }
 EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
 
@@ -654,12 +649,10 @@ EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
  * architectures where it differs, an architecture-specific version of this
  * function is required.
  */
-#ifndef vma_mmu_pagesize
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+__weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
        return vma_kernel_pagesize(vma);
 }
-#endif
 
 /*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
@@ -3132,6 +3125,13 @@ static int hugetlb_vm_op_split(struct vm_area_struct 
*vma, unsigned long addr)
        return 0;
 }
 
+static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
+{
+       struct hstate *hstate = hstate_vma(vma);
+
+       return 1UL << huge_page_shift(hstate);
+}
+
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -3149,6 +3149,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
        .open = hugetlb_vm_op_open,
        .close = hugetlb_vm_op_close,
        .split = hugetlb_vm_op_split,
+       .pagesize = hugetlb_vm_op_pagesize,
 };
 
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

Reply via email to