[PATCH PTI v2 5/6] x86/pti: Map the vsyscall page if needed
Make VSYSCALLs work fully in PTI mode. Signed-off-by: Andy Lutomirski--- arch/x86/entry/vsyscall/vsyscall_64.c | 6 ++-- arch/x86/include/asm/pgtable.h| 6 +++- arch/x86/include/asm/pgtable_64.h | 9 +++-- arch/x86/include/asm/vsyscall.h | 1 + arch/x86/mm/pti.c | 63 +++ 5 files changed, 78 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index a06f2ae09ad6..e4a6fe8354f0 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr) * vsyscalls but leave the page not present. If so, we skip calling * this. */ -static void __init set_vsyscall_pgtable_user_bits(void) +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); pgd->pgd |= _PAGE_USER; p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 @@ -372,7 +372,7 @@ void __init map_vsyscall(void) vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - set_vsyscall_pgtable_user_bits(); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83c0c77e7365..a8a8fc15ca16 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -920,7 +920,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index be8d086de927..a2fb3f8bc985 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -220,11 +220,14 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * the wrong CR3. * * As exceptions, we don't set NX if: -* - this is EFI or similar, the kernel may execute from it +* - _PAGE_USER is not set. This could be an executable +* EFI runtime mapping or something similar, and the kernel +* may execute from it * - we don't have NX support -* - we're clearing the PGD (i.e. pgd.pgd == 0). +* - we're clearing the PGD (i.e. the new pgd is not present). */ - if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX)) + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) pgd.pgd |= _PAGE_NX; } else { /* diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c..b986b2ca688a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index f48645d2f3fd..a9c53d21f0a8 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -133,6 +134,48 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) return pmd_offset(pud, address); } +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; +
[PATCH PTI v2 5/6] x86/pti: Map the vsyscall page if needed
Make VSYSCALLs work fully in PTI mode. Signed-off-by: Andy Lutomirski --- arch/x86/entry/vsyscall/vsyscall_64.c | 6 ++-- arch/x86/include/asm/pgtable.h| 6 +++- arch/x86/include/asm/pgtable_64.h | 9 +++-- arch/x86/include/asm/vsyscall.h | 1 + arch/x86/mm/pti.c | 63 +++ 5 files changed, 78 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index a06f2ae09ad6..e4a6fe8354f0 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr) * vsyscalls but leave the page not present. If so, we skip calling * this. */ -static void __init set_vsyscall_pgtable_user_bits(void) +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); pgd->pgd |= _PAGE_USER; p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 @@ -372,7 +372,7 @@ void __init map_vsyscall(void) vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - set_vsyscall_pgtable_user_bits(); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83c0c77e7365..a8a8fc15ca16 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -920,7 +920,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index be8d086de927..a2fb3f8bc985 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -220,11 +220,14 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * the wrong CR3. * * As exceptions, we don't set NX if: -* - this is EFI or similar, the kernel may execute from it +* - _PAGE_USER is not set. This could be an executable +* EFI runtime mapping or something similar, and the kernel +* may execute from it * - we don't have NX support -* - we're clearing the PGD (i.e. pgd.pgd == 0). +* - we're clearing the PGD (i.e. the new pgd is not present). */ - if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX)) + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) pgd.pgd |= _PAGE_NX; } else { /* diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c..b986b2ca688a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index f48645d2f3fd..a9c53d21f0a8 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -133,6 +134,48 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) return pmd_offset(pud, address); } +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + +