[PATCH v2 20/20] mm, x86: get_user_pages() for dax mappings

2015-10-09 Thread Dan Williams
A dax mapping establishes a pte with _PAGE_DEVMAP set when the driver
has established a devm_memremap_pages() mapping, i.e. when the pfn_t
return from ->direct_access() has PFN_DEV and PFN_MAP set.  Later, when
encountering _PAGE_DEVMAP during a page table walk we lookup and pin a
struct dev_pagemap instance to keep the result of pfn_to_page() valid
until put_page().

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: H. Peter Anvin 
Cc: Jeff Moyer 
Cc: Peter Zijlstra 
Cc: Matthew Wilcox 
Cc: Alexander Viro 
Cc: Dave Chinner 
Signed-off-by: Dan Williams 
---
 arch/ia64/include/asm/pgtable.h |1 +
 arch/x86/include/asm/pgtable.h  |2 +
 arch/x86/mm/gup.c   |   56 +--
 include/linux/mm.h  |   40 +++-
 mm/gup.c|   11 +++-
 mm/hugetlb.c|   18 -
 mm/swap.c   |   15 ++
 7 files changed, 124 insertions(+), 19 deletions(-)

diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9f3ed9ee8f13..81d2af23958f 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -273,6 +273,7 @@ extern unsigned long VMALLOC_END;
 #define pmd_clear(pmdp)(pmd_val(*(pmdp)) = 0UL)
 #define pmd_page_vaddr(pmd)((unsigned long) __va(pmd_val(pmd) & 
_PFN_MASK))
 #define pmd_page(pmd)  virt_to_page((pmd_val(pmd) + 
PAGE_OFFSET))
+#define pmd_pfn(pmd)   (pmd_val(pmd) >> PAGE_SHIFT)
 
 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!ia64_phys_addr_valid(pud_val(pud)))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 84d1346e1cda..d29dc7b4924b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -461,7 +461,7 @@ static inline int pte_present(pte_t a)
 #define pte_devmap pte_devmap
 static inline int pte_devmap(pte_t a)
 {
-   return pte_flags(a) & _PAGE_DEVMAP;
+   return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
 }
 
 #define pte_accessible pte_accessible
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..7254ba4f791d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -63,6 +63,16 @@ retry:
 #endif
 }
 
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+   while ((*nr) - nr_start) {
+   struct page *page = pages[--(*nr)];
+
+   ClearPageReferenced(page);
+   put_page(page);
+   }
+}
+
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -71,7 +81,9 @@ retry:
 static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
 {
+   struct dev_pagemap *pgmap = NULL;
unsigned long mask;
+   int nr_start = *nr;
pte_t *ptep;
 
mask = _PAGE_PRESENT|_PAGE_USER;
@@ -89,13 +101,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long 
addr,
return 0;
}
 
-   if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+   page = pte_page(pte);
+   if (pte_devmap(pte)) {
+   pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+   if (unlikely(!pgmap)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   pte_unmap(ptep);
+   return 0;
+   }
+   } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-   page = pte_page(pte);
get_page(page);
+   put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -114,6 +134,32 @@ static inline void get_head_page_multiple(struct page 
*page, int nr)
SetPageReferenced(page);
 }
 
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+   unsigned long end, struct page **pages, int *nr)
+{
+   int nr_start = *nr;
+   unsigned long pfn = pmd_pfn(pmd);
+   struct dev_pagemap *pgmap = NULL;
+
+   pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
+   do {
+   struct page *page = pfn_to_page(pfn);
+
+   pgmap = get_dev_pagemap(pfn, pgmap);
+   if (unlikely(!pgmap)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   return 0;
+   }
+   SetPageReferenced(page);
+   pages[*nr] = page;
+   

[PATCH v2 20/20] mm, x86: get_user_pages() for dax mappings

2015-10-09 Thread Dan Williams
A dax mapping establishes a pte with _PAGE_DEVMAP set when the driver
has established a devm_memremap_pages() mapping, i.e. when the pfn_t
return from ->direct_access() has PFN_DEV and PFN_MAP set.  Later, when
encountering _PAGE_DEVMAP during a page table walk we lookup and pin a
struct dev_pagemap instance to keep the result of pfn_to_page() valid
until put_page().

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: H. Peter Anvin 
Cc: Jeff Moyer 
Cc: Peter Zijlstra 
Cc: Matthew Wilcox 
Cc: Alexander Viro 
Cc: Dave Chinner 
Signed-off-by: Dan Williams 
---
 arch/ia64/include/asm/pgtable.h |1 +
 arch/x86/include/asm/pgtable.h  |2 +
 arch/x86/mm/gup.c   |   56 +--
 include/linux/mm.h  |   40 +++-
 mm/gup.c|   11 +++-
 mm/hugetlb.c|   18 -
 mm/swap.c   |   15 ++
 7 files changed, 124 insertions(+), 19 deletions(-)

diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9f3ed9ee8f13..81d2af23958f 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -273,6 +273,7 @@ extern unsigned long VMALLOC_END;
 #define pmd_clear(pmdp)(pmd_val(*(pmdp)) = 0UL)
 #define pmd_page_vaddr(pmd)((unsigned long) __va(pmd_val(pmd) & 
_PFN_MASK))
 #define pmd_page(pmd)  virt_to_page((pmd_val(pmd) + 
PAGE_OFFSET))
+#define pmd_pfn(pmd)   (pmd_val(pmd) >> PAGE_SHIFT)
 
 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!ia64_phys_addr_valid(pud_val(pud)))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 84d1346e1cda..d29dc7b4924b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -461,7 +461,7 @@ static inline int pte_present(pte_t a)
 #define pte_devmap pte_devmap
 static inline int pte_devmap(pte_t a)
 {
-   return pte_flags(a) & _PAGE_DEVMAP;
+   return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
 }
 
 #define pte_accessible pte_accessible
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..7254ba4f791d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -63,6 +63,16 @@ retry:
 #endif
 }
 
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+   while ((*nr) - nr_start) {
+   struct page *page = pages[--(*nr)];
+
+   ClearPageReferenced(page);
+   put_page(page);
+   }
+}
+
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -71,7 +81,9 @@ retry:
 static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
 {
+   struct dev_pagemap *pgmap = NULL;
unsigned long mask;
+   int nr_start = *nr;
pte_t *ptep;
 
mask = _PAGE_PRESENT|_PAGE_USER;
@@ -89,13 +101,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long 
addr,
return 0;
}
 
-   if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+   page = pte_page(pte);
+   if (pte_devmap(pte)) {
+   pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+   if (unlikely(!pgmap)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   pte_unmap(ptep);
+   return 0;
+   }
+   } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-   page = pte_page(pte);
get_page(page);
+   put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -114,6 +134,32 @@ static inline void get_head_page_multiple(struct page 
*page, int nr)
SetPageReferenced(page);
 }
 
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+   unsigned long end, struct page **pages, int *nr)
+{
+   int nr_start = *nr;
+   unsigned long pfn = pmd_pfn(pmd);
+   struct dev_pagemap *pgmap = NULL;
+
+   pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
+   do {
+   struct page *page = pfn_to_page(pfn);
+
+   pgmap =