Use the newly added compound devmap facility which maps the assigned dax
ranges as compound pages at a page size of @align.

dax devices are created with a fixed @align (huge page size) which is
enforced through as well at mmap() of the device. Faults, consequently
happen too at the specified @align specified at the creation, and those
don't change throughout dax device lifetime. MCEs unmap a whole dax
huge page, as well as splits occurring at the configured page size.

Performance measured by gup_test improves considerably for
unpin_user_pages() and altmap with NVDIMMs:

$ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S -a -n 512 -w
(pin_user_pages_fast 2M pages) put:~71 ms -> put:~22 ms
[altmap]
(pin_user_pages_fast 2M pages) get:~524ms put:~525 ms -> get: ~127ms put:~71ms

 $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S -a -n 512 -w
(pin_user_pages_fast 2M pages) put:~513 ms -> put:~188 ms
[altmap with -m 127004]
(pin_user_pages_fast 2M pages) get:~4.1 secs put:~4.12 secs -> get:~1sec 
put:~563ms

.. as well as unpin_user_page_range_dirty_lock() being just as effective
as THP/hugetlb[0] pages.

[0] 
https://lore.kernel.org/linux-mm/[email protected]/

Signed-off-by: Joao Martins <[email protected]>
Reviewed-by: Dan Williams <[email protected]>
---
 drivers/dax/device.c | 57 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index a65c67ab5ee0..0c2ac97d397d 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -192,6 +192,42 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax 
*dev_dax,
 }
 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
+static void set_page_mapping(struct vm_fault *vmf, pfn_t pfn,
+                            unsigned long fault_size,
+                            struct address_space *f_mapping)
+{
+       unsigned long i;
+       pgoff_t pgoff;
+
+       pgoff = linear_page_index(vmf->vma, ALIGN(vmf->address, fault_size));
+
+       for (i = 0; i < fault_size / PAGE_SIZE; i++) {
+               struct page *page;
+
+               page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+               if (page->mapping)
+                       continue;
+               page->mapping = f_mapping;
+               page->index = pgoff + i;
+       }
+}
+
+static void set_compound_mapping(struct vm_fault *vmf, pfn_t pfn,
+                                unsigned long fault_size,
+                                struct address_space *f_mapping)
+{
+       struct page *head;
+
+       head = pfn_to_page(pfn_t_to_pfn(pfn));
+       head = compound_head(head);
+       if (head->mapping)
+               return;
+
+       head->mapping = f_mapping;
+       head->index = linear_page_index(vmf->vma,
+                       ALIGN(vmf->address, fault_size));
+}
+
 static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
                enum page_entry_size pe_size)
 {
@@ -225,8 +261,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
        }
 
        if (rc == VM_FAULT_NOPAGE) {
-               unsigned long i;
-               pgoff_t pgoff;
+               struct dev_pagemap *pgmap = dev_dax->pgmap;
 
                /*
                 * In the device-dax case the only possibility for a
@@ -234,17 +269,10 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
                 * mapped. No need to consider the zero page, or racing
                 * conflicting mappings.
                 */
-               pgoff = linear_page_index(vmf->vma,
-                               ALIGN(vmf->address, fault_size));
-               for (i = 0; i < fault_size / PAGE_SIZE; i++) {
-                       struct page *page;
-
-                       page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
-                       if (page->mapping)
-                               continue;
-                       page->mapping = filp->f_mapping;
-                       page->index = pgoff + i;
-               }
+               if (pgmap->vmemmap_shift)
+                       set_compound_mapping(vmf, pfn, fault_size, 
filp->f_mapping);
+               else
+                       set_page_mapping(vmf, pfn, fault_size, filp->f_mapping);
        }
        dax_read_unlock(id);
 
@@ -439,6 +467,9 @@ int dev_dax_probe(struct dev_dax *dev_dax)
        }
 
        pgmap->type = MEMORY_DEVICE_GENERIC;
+       if (dev_dax->align > PAGE_SIZE)
+               pgmap->vmemmap_shift =
+                       order_base_2(dev_dax->align >> PAGE_SHIFT);
        addr = devm_memremap_pages(dev, pgmap);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
-- 
2.17.2


Reply via email to