Author: markj
Date: Mon Sep  2 21:57:57 2019
New Revision: 351728
URL: https://svnweb.freebsd.org/changeset/base/351728

Log:
  Add a sysctl to dump kernel mappings and their properties on amd64.
  
  The sysctl is called vm.pmap.kernel_maps.  It dumps address ranges
  and their corresponding protection and mapping mode, as well as
  counts of 2MB and 1GB pages in the range.
  
  Reviewed by:  kib
  MFC after:    2 weeks
  Sponsored by: Netflix
  Differential Revision:        https://reviews.freebsd.org/D21380

Modified:
  head/sys/amd64/amd64/pmap.c

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c Mon Sep  2 21:54:08 2019        (r351727)
+++ head/sys/amd64/amd64/pmap.c Mon Sep  2 21:57:57 2019        (r351728)
@@ -124,6 +124,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/rangeset.h>
 #include <sys/rwlock.h>
+#include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/turnstile.h>
 #include <sys/vmem.h>
@@ -2112,6 +2113,41 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
        return (mask);
 }
 
+static int
+pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde)
+{
+       int pat_flag, pat_idx;
+
+       pat_idx = 0;
+       switch (pmap->pm_type) {
+       case PT_X86:
+       case PT_RVI:
+               /* The PAT bit is different for PTE's and PDE's. */
+               pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
+
+               if ((pte & pat_flag) != 0)
+                       pat_idx |= 0x4;
+               if ((pte & PG_NC_PCD) != 0)
+                       pat_idx |= 0x2;
+               if ((pte & PG_NC_PWT) != 0)
+                       pat_idx |= 0x1;
+               break;
+       case PT_EPT:
+               if ((pte & EPT_PG_IGNORE_PAT) != 0)
+                       panic("EPT PTE %#lx has no PAT memory type", pte);
+               pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3;
+               break;
+       }
+
+       /* See pmap_init_pat(). */
+       if (pat_idx == 4)
+               pat_idx = 0;
+       if (pat_idx == 7)
+               pat_idx = 3;
+
+       return (pat_idx);
+}
+
 bool
 pmap_ps_enabled(pmap_t pmap)
 {
@@ -9980,6 +10016,268 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offse
        }
        return (error);
 }
+
+/*
+ * Track a range of the kernel's virtual address space that is contiguous
+ * in various mapping attributes.
+ */
+struct pmap_kernel_map_range {
+       vm_offset_t sva;
+       pt_entry_t attrs;
+       int ptes;
+       int pdes;
+       int pdpes;
+};
+
+static void
+sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
+    vm_offset_t eva)
+{
+       const char *mode;
+       int i, pat_idx;
+
+       if (eva <= range->sva)
+               return;
+
+       pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true);
+       for (i = 0; i < PAT_INDEX_SIZE; i++)
+               if (pat_index[i] == pat_idx)
+                       break;
+
+       switch (i) {
+       case PAT_WRITE_BACK:
+               mode = "WB";
+               break;
+       case PAT_WRITE_THROUGH:
+               mode = "WT";
+               break;
+       case PAT_UNCACHEABLE:
+               mode = "UC";
+               break;
+       case PAT_WRITE_PROTECTED:
+               mode = "WP";
+               break;
+       case PAT_WRITE_COMBINING:
+               mode = "WC";
+               break;
+       default:
+               printf("%s: unknown PAT mode %#x for range %#016lx-%#016lx\n",
+                   __func__, i, range->sva, eva);
+               mode = "??";
+               break;
+       }
+
+       sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n",
+           range->sva, eva,
+           (range->attrs & X86_PG_RW) != 0 ? 'w' : '-',
+           (range->attrs & pg_nx) != 0 ? '-' : 'x',
+           (range->attrs & X86_PG_U) != 0 ? 'u' : 's',
+           (range->attrs & X86_PG_G) != 0 ? 'g' : '-',
+           mode, range->pdpes, range->pdes, range->ptes);
+
+       /* Reset to sentinel value. */
+       range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
+}
+
+/*
+ * Determine whether the attributes specified by a page table entry match those
+ * being tracked by the current range.  This is not quite as simple as a direct
+ * flag comparison since some PAT modes have multiple representations.
+ */
+static bool
+sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
+{
+       pt_entry_t diff, mask;
+
+       mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx;
+       diff = (range->attrs ^ attrs) & mask;
+       if (diff == 0)
+               return (true);
+       if ((diff & ~X86_PG_PDE_PAT) == 0 &&
+           pmap_pat_index(kernel_pmap, range->attrs, true) ==
+           pmap_pat_index(kernel_pmap, attrs, true))
+               return (true);
+       return (false);
+}
+
+static void
+sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
+    pt_entry_t attrs)
+{
+
+       memset(range, 0, sizeof(*range));
+       range->sva = va;
+       range->attrs = attrs;
+}
+
+/*
+ * Given a leaf PTE, derive the mapping's attributes.  If they do not match
+ * those of the current run, dump the address range and its attributes, and
+ * begin a new run.
+ */
+static void
+sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
+    vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde,
+    pt_entry_t pte)
+{
+       pt_entry_t attrs;
+
+       attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx);
+
+       attrs |= pdpe & pg_nx;
+       attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U));
+       if ((pdpe & PG_PS) != 0) {
+               attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE);
+       } else if (pde != 0) {
+               attrs |= pde & pg_nx;
+               attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U));
+       }
+       if ((pde & PG_PS) != 0) {
+               attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE);
+       } else if (pte != 0) {
+               attrs |= pte & pg_nx;
+               attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U));
+               attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE);
+
+               /* Canonicalize by always using the PDE PAT bit. */
+               if ((attrs & X86_PG_PTE_PAT) != 0)
+                       attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT;
+       }
+
+       if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
+               sysctl_kmaps_dump(sb, range, va);
+               sysctl_kmaps_reinit(range, va, attrs);
+       }
+}
+
+static int
+sysctl_kmaps(SYSCTL_HANDLER_ARGS)
+{
+       struct pmap_kernel_map_range range;
+       struct sbuf sbuf, *sb;
+       pml4_entry_t pml4e;
+       pdp_entry_t *pdp, pdpe;
+       pd_entry_t *pd, pde;
+       pt_entry_t *pt, pte;
+       vm_offset_t sva;
+       vm_paddr_t pa;
+       int error, i, j, k, l;
+
+       error = sysctl_wire_old_buffer(req, 0);
+       if (error != 0)
+               return (error);
+       sb = &sbuf;
+       sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
+
+       /* Sentinel value. */
+       range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
+
+       /*
+        * Iterate over the kernel page tables without holding the kernel pmap
+        * lock.  Outside of the large map, kernel page table pages are never
+        * freed, so at worst we will observe inconsistencies in the output.
+        * Within the large map, ensure that PDP and PD page addresses are
+        * valid before descending.
+        */
+       for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) {
+               switch (i) {
+               case PML4PML4I:
+                       sbuf_printf(sb, "\nRecursive map:\n");
+                       break;
+               case DMPML4I:
+                       sbuf_printf(sb, "\nDirect map:\n");
+                       break;
+               case KPML4BASE:
+                       sbuf_printf(sb, "\nKernel map:\n");
+                       break;
+               case LMSPML4I:
+                       sbuf_printf(sb, "\nLarge map:\n");
+                       break;
+               }
+
+               /* Convert to canonical form. */
+               if (sva == 1ul << 47)
+                       sva |= -1ul << 48;
+
+restart:
+               pml4e = kernel_pmap->pm_pml4[i];
+               if ((pml4e & X86_PG_V) == 0) {
+                       sva = rounddown2(sva, NBPML4);
+                       sysctl_kmaps_dump(sb, &range, sva);
+                       sva += NBPML4;
+                       continue;
+               }
+               pa = pml4e & PG_FRAME;
+               pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa);
+
+               for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) {
+                       pdpe = pdp[j];
+                       if ((pdpe & X86_PG_V) == 0) {
+                               sva = rounddown2(sva, NBPDP);
+                               sysctl_kmaps_dump(sb, &range, sva);
+                               sva += NBPDP;
+                               continue;
+                       }
+                       pa = pdpe & PG_FRAME;
+                       if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
+                           vm_phys_paddr_to_vm_page(pa) == NULL)
+                               goto restart;
+                       if ((pdpe & PG_PS) != 0) {
+                               sva = rounddown2(sva, NBPDP);
+                               sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe,
+                                   0, 0);
+                               range.pdpes++;
+                               sva += NBPDP;
+                               continue;
+                       }
+                       pd = (pd_entry_t *)PHYS_TO_DMAP(pa);
+
+                       for (k = pmap_pde_index(sva); k < NPDEPG; k++) {
+                               pde = pd[k];
+                               if ((pde & X86_PG_V) == 0) {
+                                       sva = rounddown2(sva, NBPDR);
+                                       sysctl_kmaps_dump(sb, &range, sva);
+                                       sva += NBPDR;
+                                       continue;
+                               }
+                               pa = pde & PG_FRAME;
+                               if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
+                                   vm_phys_paddr_to_vm_page(pa) == NULL)
+                                       goto restart;
+                               if ((pde & PG_PS) != 0) {
+                                       sva = rounddown2(sva, NBPDR);
+                                       sysctl_kmaps_check(sb, &range, sva,
+                                           pml4e, pdpe, pde, 0);
+                                       range.pdes++;
+                                       sva += NBPDR;
+                                       continue;
+                               }
+                               pt = (pt_entry_t *)PHYS_TO_DMAP(pa);
+
+                               for (l = pmap_pte_index(sva); l < NPTEPG; l++,
+                                   sva += PAGE_SIZE) {
+                                       pte = pt[l];
+                                       if ((pte & X86_PG_V) == 0) {
+                                               sysctl_kmaps_dump(sb, &range,
+                                                   sva);
+                                               continue;
+                                       }
+                                       sysctl_kmaps_check(sb, &range, sva,
+                                           pml4e, pdpe, pde, pte);
+                                       range.ptes++;
+                               }
+                       }
+               }
+       }
+
+       error = sbuf_finish(sb);
+       sbuf_delete(sb);
+       return (error);
+}
+SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
+    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, sysctl_kmaps, "A",
+    "Dump kernel address layout");
 
 #ifdef DDB
 DB_SHOW_COMMAND(pte, pmap_print_pte)
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to