Instead of providing our own callbacks for walking the page tables,
switch to using the generic version instead.

Signed-off-by: Steven Price <steven.pr...@arm.com>
---
 arch/x86/Kconfig              |   1 +
 arch/x86/Kconfig.debug        |  20 +--
 arch/x86/mm/Makefile          |   4 +-
 arch/x86/mm/dump_pagetables.c | 297 +++++++---------------------------
 4 files changed, 62 insertions(+), 260 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..122c24055f02 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -106,6 +106,7 @@ config X86
        select GENERIC_IRQ_RESERVATION_MODE
        select GENERIC_IRQ_SHOW
        select GENERIC_PENDING_IRQ              if SMP
+       select GENERIC_PTDUMP
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_STRNCPY_FROM_USER
        select GENERIC_STRNLEN_USER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 15d0fbe27872..dc1dfe213657 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC
 config MCSAFE_TEST
        def_bool n
 
-config X86_PTDUMP_CORE
-       def_bool n
-
-config X86_PTDUMP
-       tristate "Export kernel pagetable layout to userspace via debugfs"
-       depends on DEBUG_KERNEL
-       select DEBUG_FS
-       select X86_PTDUMP_CORE
-       ---help---
-         Say Y here if you want to show the kernel pagetable layout in a
-         debugfs file. This information is only useful for kernel developers
-         who are working in architecture specific areas of the kernel.
-         It is probably not a good idea to enable this feature in a production
-         kernel.
-         If in doubt, say "N"
-
 config EFI_PGT_DUMP
        bool "Dump the EFI pagetable"
        depends on EFI
-       select X86_PTDUMP_CORE
+       select PTDUMP_CORE
        ---help---
          Enable this if you want to dump the EFI page table before
          enabling virtual mode. This can be used to debug miscellaneous
@@ -90,7 +74,7 @@ config EFI_PGT_DUMP
 
 config DEBUG_WX
        bool "Warn on W+X mappings at boot"
-       select X86_PTDUMP_CORE
+       select PTDUMP_CORE
        ---help---
          Generate a warning if any W+X mappings are found at boot.
 
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..5233190fc6bf 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -28,8 +28,8 @@ obj-$(CONFIG_X86_PAT)         += pat_rbtree.o
 obj-$(CONFIG_X86_32)           += pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP_CORE)  += dump_pagetables.o
-obj-$(CONFIG_X86_PTDUMP)       += debug_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE)      += dump_pagetables.o
+obj-$(CONFIG_PTDUMP_DEBUGFS)   += debug_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)          += highmem_32.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f6b814aaddf7..955824c7cddb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/pci.h>
+#include <linux/ptdump.h>
 
 #include <asm/e820/types.h>
 #include <asm/pgtable.h>
@@ -30,15 +31,12 @@
  * when a "break" in the continuity is found.
  */
 struct pg_state {
+       struct ptdump_state ptdump;
        int level;
-       pgprot_t current_prot;
+       pgprotval_t current_prot;
        pgprotval_t effective_prot;
-       pgprotval_t effective_prot_pgd;
-       pgprotval_t effective_prot_p4d;
-       pgprotval_t effective_prot_pud;
-       pgprotval_t effective_prot_pmd;
+       pgprotval_t prot_levels[5];
        unsigned long start_address;
-       unsigned long current_address;
        const struct addr_marker *marker;
        unsigned long lines;
        bool to_dmesg;
@@ -179,9 +177,8 @@ static struct addr_marker address_markers[] = {
 /*
  * Print a readable form of a pgprot_t to the seq_file
  */
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool 
dmsg)
+static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool 
dmsg)
 {
-       pgprotval_t pr = pgprot_val(prot);
        static const char * const level_name[] =
                { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
@@ -228,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t 
prot, int level, bool dmsg)
        pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
 }
 
-/*
- * On 64 bits, sign-extend the 48 bit address to 64 bit
- */
-static unsigned long normalize_addr(unsigned long u)
-{
-       int shift;
-       if (!IS_ENABLED(CONFIG_X86_64))
-               return u;
-
-       shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
-       return (signed long)(u << shift) >> shift;
-}
-
-static void note_wx(struct pg_state *st)
+static void note_wx(struct pg_state *st, unsigned long addr)
 {
        unsigned long npages;
 
-       npages = (st->current_address - st->start_address) / PAGE_SIZE;
+       npages = (addr - st->start_address) / PAGE_SIZE;
 
 #ifdef CONFIG_PCI_BIOS
        /*
@@ -253,7 +237,7 @@ static void note_wx(struct pg_state *st)
         * Inform about it, but avoid the warning.
         */
        if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
-           st->current_address <= PAGE_OFFSET + BIOS_END) {
+           addr <= PAGE_OFFSET + BIOS_END) {
                pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", 
npages);
                return;
        }
@@ -264,25 +248,44 @@ static void note_wx(struct pg_state *st)
                  (void *)st->start_address);
 }
 
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+       return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+              ((prot1 | prot2) & _PAGE_NX);
+}
+
 /*
  * This function gets called on a break in a continuous series
  * of PTE entries; the next one is different so we need to
  * print what we collected so far.
  */
-static void note_page(struct pg_state *st, pgprot_t new_prot,
-                     pgprotval_t new_eff, int level)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int 
level,
+                     unsigned long val)
 {
-       pgprotval_t prot, cur, eff;
+       struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+       pgprotval_t new_prot, new_eff;
+       pgprotval_t cur, eff;
        static const char units[] = "BKMGTPE";
        struct seq_file *m = st->seq;
 
+       new_prot = val & PTE_FLAGS_MASK;
+
+       if (level > 1) {
+               new_eff = effective_prot(st->prot_levels[level - 2],
+                                        new_prot);
+       } else {
+               new_eff = new_prot;
+       }
+
+       if (level > 0)
+               st->prot_levels[level-1] = new_eff;
+
        /*
         * If we have a "break" in the series, we need to flush the state that
         * we have now. "break" is either changing perms, levels or
         * address space marker.
         */
-       prot = pgprot_val(new_prot);
-       cur = pgprot_val(st->current_prot);
+       cur = st->current_prot;
        eff = st->effective_prot;
 
        if (!st->level) {
@@ -294,14 +297,14 @@ static void note_page(struct pg_state *st, pgprot_t 
new_prot,
                st->lines = 0;
                pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
                                   st->marker->name);
-       } else if (prot != cur || new_eff != eff || level != st->level ||
-                  st->current_address >= st->marker[1].start_address) {
+       } else if (new_prot != cur || new_eff != eff || level != st->level ||
+                  addr >= st->marker[1].start_address) {
                const char *unit = units;
                unsigned long delta;
                int width = sizeof(unsigned long) * 2;
 
                if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
-                       note_wx(st);
+                       note_wx(st, addr);
 
                /*
                 * Now print the actual finished series
@@ -311,9 +314,9 @@ static void note_page(struct pg_state *st, pgprot_t 
new_prot,
                        pt_dump_seq_printf(m, st->to_dmesg,
                                           "0x%0*lx-0x%0*lx   ",
                                           width, st->start_address,
-                                          width, st->current_address);
+                                          width, addr);
 
-                       delta = st->current_address - st->start_address;
+                       delta = addr - st->start_address;
                        while (!(delta & 1023) && unit[1]) {
                                delta >>= 10;
                                unit++;
@@ -331,7 +334,7 @@ static void note_page(struct pg_state *st, pgprot_t 
new_prot,
                 * such as the start of vmalloc space etc.
                 * This helps in the interpretation.
                 */
-               if (st->current_address >= st->marker[1].start_address) {
+               if (addr >= st->marker[1].start_address) {
                        if (st->marker->max_lines &&
                            st->lines > st->marker->max_lines) {
                                unsigned long nskip =
@@ -347,228 +350,42 @@ static void note_page(struct pg_state *st, pgprot_t 
new_prot,
                                           st->marker->name);
                }
 
-               st->start_address = st->current_address;
+               st->start_address = addr;
                st->current_prot = new_prot;
                st->effective_prot = new_eff;
                st->level = level;
        }
 }
 
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
-{
-       return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
-              ((prot1 | prot2) & _PAGE_NX);
-}
-
-static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
-                           unsigned long next, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-       pgprotval_t eff, prot;
-
-       st->current_address = normalize_addr(addr);
-
-       prot = pte_flags(*pte);
-       eff = effective_prot(st->effective_prot_pmd, prot);
-       note_page(st, __pgprot(prot), eff, 5);
-
-       return 0;
-}
-
-#ifdef CONFIG_KASAN
-
-/*
- * This is an optimization for KASAN=y case. Since all kasan page tables
- * eventually point to the kasan_early_shadow_page we could call note_page()
- * right away without walking through lower level page tables. This saves
- * us dozens of seconds (minutes for 5-level config) while checking for
- * W+X mapping or reading kernel_page_tables debugfs file.
- */
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-       if (__pa(pt) == __pa(kasan_early_shadow_pmd) ||
-           (pgtable_l5_enabled() &&
-                       __pa(pt) == __pa(kasan_early_shadow_p4d)) ||
-           __pa(pt) == __pa(kasan_early_shadow_pud)) {
-               pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]);
-               note_page(st, __pgprot(prot), 0, 5);
-               return true;
-       }
-       return false;
-}
-#else
-static inline bool kasan_page_table(struct pg_state *st, void *pt)
-{
-       return false;
-}
-#endif
-
-static int ptdump_test_pmd(unsigned long addr, unsigned long next,
-                          pmd_t *pmd, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-
-       st->current_address = normalize_addr(addr);
-
-       if (kasan_page_table(st, pmd))
-               return 1;
-       return 0;
-}
-
-static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
-                           unsigned long next, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-       pgprotval_t eff, prot;
-
-       prot = pmd_flags(*pmd);
-       eff = effective_prot(st->effective_prot_pud, prot);
-
-       st->current_address = normalize_addr(addr);
-
-       if (pmd_large(*pmd))
-               note_page(st, __pgprot(prot), eff, 4);
-
-       st->effective_prot_pmd = eff;
-
-       return 0;
-}
-
-static int ptdump_test_pud(unsigned long addr, unsigned long next,
-                          pud_t *pud, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-
-       st->current_address = normalize_addr(addr);
-
-       if (kasan_page_table(st, pud))
-               return 1;
-       return 0;
-}
-
-static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
-                           unsigned long next, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-       pgprotval_t eff, prot;
-
-       prot = pud_flags(*pud);
-       eff = effective_prot(st->effective_prot_p4d, prot);
-
-       st->current_address = normalize_addr(addr);
-
-       if (pud_large(*pud))
-               note_page(st, __pgprot(prot), eff, 3);
-
-       st->effective_prot_pud = eff;
-
-       return 0;
-}
-
-static int ptdump_test_p4d(unsigned long addr, unsigned long next,
-                          p4d_t *p4d, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-
-       st->current_address = normalize_addr(addr);
-
-       if (kasan_page_table(st, p4d))
-               return 1;
-       return 0;
-}
-
-static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
-                           unsigned long next, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-       pgprotval_t eff, prot;
-
-       prot = p4d_flags(*p4d);
-       eff = effective_prot(st->effective_prot_pgd, prot);
-
-       st->current_address = normalize_addr(addr);
-
-       if (p4d_large(*p4d))
-               note_page(st, __pgprot(prot), eff, 2);
-
-       st->effective_prot_p4d = eff;
-
-       return 0;
-}
-
-static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
-                           unsigned long next, struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-       pgprotval_t eff, prot;
+static const struct ptdump_range ptdump_ranges[] = {
+#ifdef CONFIG_X86_64
 
-       prot = pgd_flags(*pgd);
+#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
+#define normalize_addr(u) ((signed long)(u << normalize_addr_shift) >> 
normalize_addr_shift)
 
-#ifdef CONFIG_X86_PAE
-       eff = _PAGE_USER | _PAGE_RW;
+       {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
+       {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
 #else
-       eff = prot;
+       {0, ~0UL},
 #endif
-
-       st->current_address = normalize_addr(addr);
-
-       if (pgd_large(*pgd))
-               note_page(st, __pgprot(prot), eff, 1);
-
-       st->effective_prot_pgd = eff;
-
-       return 0;
-}
-
-static int ptdump_hole(unsigned long addr, unsigned long next,
-                      struct mm_walk *walk)
-{
-       struct pg_state *st = walk->private;
-
-       st->current_address = normalize_addr(addr);
-
-       note_page(st, __pgprot(0), 0, -1);
-
-       return 0;
-}
+       {0, 0}
+};
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct 
*mm,
                                       bool checkwx, bool dmesg)
 {
-       struct pg_state st = {};
-       struct mm_walk walk = {
-               .mm             = mm,
-               .pgd_entry      = ptdump_pgd_entry,
-               .p4d_entry      = ptdump_p4d_entry,
-               .pud_entry      = ptdump_pud_entry,
-               .pmd_entry      = ptdump_pmd_entry,
-               .pte_entry      = ptdump_pte_entry,
-               .test_p4d       = ptdump_test_p4d,
-               .test_pud       = ptdump_test_pud,
-               .test_pmd       = ptdump_test_pmd,
-               .pte_hole       = ptdump_hole,
-               .private        = &st
+       struct pg_state st = {
+               .ptdump = {
+                       .note_page      = note_page,
+                       .range          = ptdump_ranges
+               },
+               .to_dmesg       = dmesg,
+               .check_wx       = checkwx,
+               .seq            = m
        };
 
-       st.to_dmesg = dmesg;
-       st.check_wx = checkwx;
-       st.seq = m;
-       if (checkwx)
-               st.wx_pages = 0;
-
-       down_read(&mm->mmap_sem);
-#ifdef CONFIG_X86_64
-       walk_page_range(0, PTRS_PER_PGD*PGD_LEVEL_MULT/2, &walk);
-       walk_page_range(normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT/2), ~0,
-                       &walk);
-#else
-       walk_page_range(0, ~0, &walk);
-#endif
-       up_read(&mm->mmap_sem);
+       ptdump_walk_pgd(&st.ptdump, mm);
 
-       /* Flush out the last page */
-       st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
-       note_page(&st, __pgprot(0), 0, 0);
        if (!checkwx)
                return;
        if (st.wx_pages)
-- 
2.20.1

Reply via email to