Re: [PATCH 1/2] x86: mm: ptdump: Calculate effective permissions correctly
On 27/05/2020 16:15, Jan Beulich wrote: On 21.05.2020 17:23, Steven Price wrote: By switching the x86 page table dump code to use the generic code the effective permissions are no longer calculated correctly because the note_page() function is only called for *leaf* entries. To calculate the actual effective permissions it is necessary to observe the full hierarchy of the page tree. Introduce a new callback for ptdump which is called for every entry and can therefore update the prot_levels array correctly. note_page() can then simply access the appropriate element in the array. Reported-by: Jan Beulich Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range") Signed-off-by: Steven Price This (with the later correction) and the 2nd patch Tested-by: Jan Beulich It allowed me to go and finally find why under Xen there was still a single W+X mapping left - another bug, another patch. Thanks, Jan Thanks for testing (and sorry for breaking it in the first place)! Steve
Re: [PATCH 1/2] x86: mm: ptdump: Calculate effective permissions correctly
On 21.05.2020 17:23, Steven Price wrote: > By switching the x86 page table dump code to use the generic code the > effective permissions are no longer calculated correctly because the > note_page() function is only called for *leaf* entries. To calculate the > actual effective permissions it is necessary to observe the full > hierarchy of the page tree. > > Introduce a new callback for ptdump which is called for every entry and > can therefore update the prot_levels array correctly. note_page() can > then simply access the appropriate element in the array. > > Reported-by: Jan Beulich > Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use > walk_page_range") > Signed-off-by: Steven Price This (with the later correction) and the 2nd patch Tested-by: Jan Beulich It allowed me to go and finally find why under Xen there was still a single W+X mapping left - another bug, another patch. Thanks, Jan
Re: [PATCH 1/2] x86: mm: ptdump: Calculate effective permissions correctly
On 22/05/2020 19:07, Qian Cai wrote: On Thu, May 21, 2020 at 04:23:07PM +0100, Steven Price wrote: --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr) @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct seq_file *m = st->seq; new_prot = val & PTE_FLAGS_MASK; + new_eff = st->prot_levels[level]; This will trigger, .config (if ever matters): https://raw.githubusercontent.com/cailca/linux-mm/master/x86.config [ 104.532621] UBSAN: array-index-out-of-bounds in arch/x86/mm/dump_pagetables.c:284:27 [ 104.542620] index -1 is out of range for type 'pgprotval_t [5]' Doh! In this case the result (new_eff) is never actually used: the -1 is used just to flush the last proper entry out, so in this case val == 0 which means the following statement sets new_eff = 0. But obviously an out-of-bounds access isn't great. Thanks for testing! The following patch should fix it up by making the assignment conditional on val != 0. Andrew: Would you like me to resend, or can you squash in the below? Steve -8< diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 17aa7ac94a34..ea9010113f69 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -282,10 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct seq_file *m = st->seq; new_prot = val & PTE_FLAGS_MASK; - new_eff = st->prot_levels[level]; - if (!val) new_eff = 0; + else + new_eff = st->prot_levels[level]; /* * If we have a "break" in the series, we need to flush the state that
Re: [PATCH 1/2] x86: mm: ptdump: Calculate effective permissions correctly
On Thu, May 21, 2020 at 04:23:07PM +0100, Steven Price wrote: > --- a/arch/x86/mm/dump_pagetables.c > +++ b/arch/x86/mm/dump_pagetables.c > @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long > addr) > @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, > unsigned long addr, int level, > struct seq_file *m = st->seq; > > new_prot = val & PTE_FLAGS_MASK; > + new_eff = st->prot_levels[level]; This will trigger, .config (if ever matters): https://raw.githubusercontent.com/cailca/linux-mm/master/x86.config [ 104.532621] UBSAN: array-index-out-of-bounds in arch/x86/mm/dump_pagetables.c:284:27 [ 104.542620] index -1 is out of range for type 'pgprotval_t [5]' [ 104.552624] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-rc6-next-20200522+ #5 [ 104.560865] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 [ 104.562604] Call Trace: [ 104.562604] dump_stack+0xa7/0xea [ 104.562604] ubsan_epilogue+0x9/0x45 [ 104.562604] __ubsan_handle_out_of_bounds.cold.12+0x2b/0x36 [ 104.562604] ? __kasan_check_write+0x14/0x20 [ 104.562604] note_page+0x91f/0xa00 [ 104.562604] ? down_read_non_owner+0x330/0x330 [ 104.562604] ? match_held_lock+0x20/0x250 [ 104.562604] ptdump_walk_pgd+0xa1/0xb0 [ 104.562604] ptdump_walk_pgd_level_core+0x19f/0x200 [ 104.562604] ? up+0x46/0x60 [ 104.562604] ? hugetlb_get_unmapped_area+0x590/0x590 [ 104.562604] ? lock_downgrade+0x3e0/0x3e0 [ 104.562604] ? _raw_spin_unlock_irqrestore+0x44/0x50 [ 104.562604] ? ptdump_walk_pgd_level_debugfs+0x80/0x80 [ 104.562604] ? ptdump_walk_pgd_level_core+0x200/0x200 [ 104.562604] ? virt_efi_set_variable_nonblocking+0xf1/0x110 [ 104.562604] ptdump_walk_pgd_level+0x32/0x40 [ 104.562604] efi_dump_pagetable+0x17/0x19 [ 104.562604] efi_enter_virtual_mode+0x3e5/0x3f5 [ 104.562604] start_kernel+0x848/0x8f6 [ 104.562604] ? __early_make_pgtable+0x2cb/0x314 [ 104.562604] ? thread_stack_cache_init+0xb/0xb [ 104.562604] ? early_make_pgtable+0x21/0x23 [ 104.562604] ? early_idt_handler_common+0x35/0x4c [ 104.562604] x86_64_start_reservations+0x24/0x26 [ 104.562604] x86_64_start_kernel+0xf4/0xfb [ 104.562604] secondary_startup_64+0xb6/0xc0 > > - if (level > 0) { > - new_eff = effective_prot(st->prot_levels[level - 1], > - new_prot); > - } else { > - new_eff = new_prot; > - } > - > - if (level >= 0) > - st->prot_levels[level] = new_eff; > + if (!val) > + new_eff = 0; > > /* >* If we have a "break" in the series, we need to flush the state that
[PATCH 1/2] x86: mm: ptdump: Calculate effective permissions correctly
By switching the x86 page table dump code to use the generic code the effective permissions are no longer calculated correctly because the note_page() function is only called for *leaf* entries. To calculate the actual effective permissions it is necessary to observe the full hierarchy of the page tree. Introduce a new callback for ptdump which is called for every entry and can therefore update the prot_levels array correctly. note_page() can then simply access the appropriate element in the array. Reported-by: Jan Beulich Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range") Signed-off-by: Steven Price --- arch/x86/mm/dump_pagetables.c | 31 +++ include/linux/ptdump.h| 1 + mm/ptdump.c | 17 - 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 69309cd56fdf..199bbb7fbd79 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr) (void *)st->start_address); } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) { - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t prot = val & PTE_FLAGS_MASK; + pgprotval_t effective; + + if (level > 0) { + pgprotval_t higher_prot = st->prot_levels[level - 1]; + + effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) | + ((higher_prot | prot) & _PAGE_NX); + } else { + effective = prot; + } + + st->prot_levels[level] = effective; } /* @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct seq_file *m = st->seq; new_prot = val & PTE_FLAGS_MASK; + new_eff = st->prot_levels[level]; - if (level > 0) { - new_eff = effective_prot(st->prot_levels[level - 1], -new_prot); - } else { - new_eff = new_prot; - } - - if (level >= 0) - st->prot_levels[level] = new_eff; + if (!val) + new_eff = 0; /* * If we have a "break" in the series, we need to flush the state that @@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, struct pg_state st = { .ptdump = { .note_page = note_page, + .effective_prot = effective_prot, .range = ptdump_ranges }, .level = -1, diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index a67065c403c3..ac01502763bf 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -14,6 +14,7 @@ struct ptdump_state { /* level is 0:PGD to 4:PTE, or -1 if unknown */ void (*note_page)(struct ptdump_state *st, unsigned long addr, int level, unsigned long val); + void (*effective_prot)(struct ptdump_state *st, int level, u64 val); const struct ptdump_range *range; }; diff --git a/mm/ptdump.c b/mm/ptdump.c index 26208d0d03b7..f4ce916f5602 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -36,6 +36,9 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, return note_kasan_page_table(walk, addr); #endif + if (st->effective_prot) + st->effective_prot(st, 0, pgd_val(val)); + if (pgd_leaf(val)) st->note_page(st, addr, 0, pgd_val(val)); @@ -53,6 +56,9 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, return note_kasan_page_table(walk, addr); #endif + if (st->effective_prot) + st->effective_prot(st, 1, p4d_val(val)); + if (p4d_leaf(val)) st->note_page(st, addr, 1, p4d_val(val)); @@ -70,6 +76,9 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr, return note_kasan_page_table(walk, addr); #endif + if (st->effective_prot) + st->effective_prot(st, 2, pud_val(val)); + if (pud_leaf(val)) st->note_page(st, addr, 2, pud_val(val)); @@ -87,6 +96,8 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, return note_kasan_page_table(walk, addr); #endif + if (st->effective_prot) + st->effective_prot(st, 3, pmd_val(val)); if (pmd_leaf(val)) st->note_page(st, addr, 3, pmd_val(val)); @@ -97,8 +108,12 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr, unsigned long