[PATCH PTI v2 5/6] x86/pti: Map the vsyscall page if needed

2017-12-10 Thread Andy Lutomirski
Make VSYSCALLs work fully in PTI mode.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/vsyscall/vsyscall_64.c |  6 ++--
 arch/x86/include/asm/pgtable.h|  6 +++-
 arch/x86/include/asm/pgtable_64.h |  9 +++--
 arch/x86/include/asm/vsyscall.h   |  1 +
 arch/x86/mm/pti.c | 63 +++
 5 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c 
b/arch/x86/entry/vsyscall/vsyscall_64.c
index a06f2ae09ad6..e4a6fe8354f0 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr)
  * vsyscalls but leave the page not present.  If so, we skip calling
  * this.
  */
-static void __init set_vsyscall_pgtable_user_bits(void)
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
 {
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
 
-   pgd = pgd_offset_k(VSYSCALL_ADDR);
+   pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
pgd->pgd |= _PAGE_USER;
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
 #if CONFIG_PGTABLE_LEVELS >= 5
@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
 vsyscall_mode == NATIVE
 ? PAGE_KERNEL_VSYSCALL
 : PAGE_KERNEL_VVAR);
-   set_vsyscall_pgtable_user_bits();
+   set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
 
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 83c0c77e7365..a8a8fc15ca16 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -920,7 +920,11 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
diff --git a/arch/x86/include/asm/pgtable_64.h 
b/arch/x86/include/asm/pgtable_64.h
index be8d086de927..a2fb3f8bc985 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,11 +220,14 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t 
pgd)
 * the wrong CR3.
 *
 * As exceptions, we don't set NX if:
-*  - this is EFI or similar, the kernel may execute from it
+*  - _PAGE_USER is not set.  This could be an executable
+* EFI runtime mapping or something similar, and the kernel
+* may execute from it
 *  - we don't have NX support
-*  - we're clearing the PGD (i.e. pgd.pgd == 0).
+*  - we're clearing the PGD (i.e. the new pgd is not present).
 */
-   if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+   if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == 
(_PAGE_USER|_PAGE_PRESENT) &&
+   (__supported_pte_mask & _PAGE_NX))
pgd.pgd |= _PAGE_NX;
} else {
/*
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f48645d2f3fd..a9c53d21f0a8 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -38,6 +38,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -133,6 +134,48 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long 
address)
return pmd_offset(pud, address);
 }
 
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+   gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+   pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+   pte_t *pte;
+
+   /* We can't do anything sensible if we hit a large mapping. */
+   if (pmd_large(*pmd)) {
+   WARN_ON(1);
+   return NULL;
+

[PATCH PTI v2 5/6] x86/pti: Map the vsyscall page if needed

2017-12-10 Thread Andy Lutomirski
Make VSYSCALLs work fully in PTI mode.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/vsyscall/vsyscall_64.c |  6 ++--
 arch/x86/include/asm/pgtable.h|  6 +++-
 arch/x86/include/asm/pgtable_64.h |  9 +++--
 arch/x86/include/asm/vsyscall.h   |  1 +
 arch/x86/mm/pti.c | 63 +++
 5 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c 
b/arch/x86/entry/vsyscall/vsyscall_64.c
index a06f2ae09ad6..e4a6fe8354f0 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr)
  * vsyscalls but leave the page not present.  If so, we skip calling
  * this.
  */
-static void __init set_vsyscall_pgtable_user_bits(void)
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
 {
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
 
-   pgd = pgd_offset_k(VSYSCALL_ADDR);
+   pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
pgd->pgd |= _PAGE_USER;
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
 #if CONFIG_PGTABLE_LEVELS >= 5
@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
 vsyscall_mode == NATIVE
 ? PAGE_KERNEL_VSYSCALL
 : PAGE_KERNEL_VVAR);
-   set_vsyscall_pgtable_user_bits();
+   set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
 
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 83c0c77e7365..a8a8fc15ca16 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -920,7 +920,11 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
diff --git a/arch/x86/include/asm/pgtable_64.h 
b/arch/x86/include/asm/pgtable_64.h
index be8d086de927..a2fb3f8bc985 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,11 +220,14 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t 
pgd)
 * the wrong CR3.
 *
 * As exceptions, we don't set NX if:
-*  - this is EFI or similar, the kernel may execute from it
+*  - _PAGE_USER is not set.  This could be an executable
+* EFI runtime mapping or something similar, and the kernel
+* may execute from it
 *  - we don't have NX support
-*  - we're clearing the PGD (i.e. pgd.pgd == 0).
+*  - we're clearing the PGD (i.e. the new pgd is not present).
 */
-   if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+   if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == 
(_PAGE_USER|_PAGE_PRESENT) &&
+   (__supported_pte_mask & _PAGE_NX))
pgd.pgd |= _PAGE_NX;
} else {
/*
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f48645d2f3fd..a9c53d21f0a8 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -38,6 +38,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -133,6 +134,48 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long 
address)
return pmd_offset(pud, address);
 }
 
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+   gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+   pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+   pte_t *pte;
+
+   /* We can't do anything sensible if we hit a large mapping. */
+   if (pmd_large(*pmd)) {
+   WARN_ON(1);
+   return NULL;
+   }
+
+