The branch main has been updated by andrew:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=36f1526a598c373ca660910c9772d28a61383c3b

commit 36f1526a598c373ca660910c9772d28a61383c3b
Author:     Andrew Turner <[email protected]>
AuthorDate: 2022-03-23 17:39:58 +0000
Commit:     Andrew Turner <[email protected]>
CommitDate: 2022-07-19 09:57:03 +0000

    Add experimental 16k page support on arm64
    
    Add initial 16k page support on arm64. It is considered experimental,
    with no guarantee of compatibility with a userspace or kernel modules
    built with the current a 4k page size as code will likely try to pass
    in a too small size when working with APIs that take a multiple of a
    page, e.g. mmap.
    
    As this is experimental, and because userspace and the kernel need to
    have the PAGE_SIZE macro kept in sync there is no kernel option to
    enable this. To test a new image should be built with the
    PAGE_{SIZE,SHIFT,MASK} macros changed to the 16k versions.
    
    There are currently known issues with loading modules from an old
    loader as it can misalign them to load on a non-16k boundary.
    
    Testing has shown good results in kernel workloads that allocate and
    free large amounts of memory as only a quarter of the number of calls
    into the VM subsystem are needed in the best case.
    
    Reviewed by:    markj
    Tested by:      gallatin
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D34793
---
 sys/arm64/arm64/locore.S           | 136 ++++++++++++++++++++++++++++++++--
 sys/arm64/arm64/minidump_machdep.c |   6 ++
 sys/arm64/arm64/pmap.c             | 146 +++++++++++++++++++++++++------------
 sys/arm64/include/pmap.h           |  11 +++
 sys/arm64/include/pte.h            |  42 ++++++++---
 5 files changed, 277 insertions(+), 64 deletions(-)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index ba85bb4e46b2..518c6c812aa9 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -39,6 +39,14 @@
 
 #define        VIRT_BITS       48
 
+#if PAGE_SIZE == PAGE_SIZE_16K
+/*
+ * The number of level 3 tables to create. 32 will allow for 1G of address
+ * space, the same as a single level 2 page with 4k pages.
+ */
+#define        L3_PAGE_COUNT   32
+#endif
+
        .globl  kernbase
        .set    kernbase, KERNBASE
 
@@ -431,8 +439,13 @@ LENTRY(create_pagetables)
        /* Booted with modules pointer */
        /* Find modulep - begin */
        sub     x8, x0, x6
-       /* Add two 2MiB pages for the module data and round up */
-       ldr     x7, =(3 * L2_SIZE - 1)
+       /*
+        * Add space for the module data. When PAGE_SIZE is 4k this will
+        * add at least 2 level 2 blocks (2 * 2MiB). When PAGE_SIZE is
+        * larger it will be at least as large as we use smaller level 3
+        * pages.
+        */
+       ldr     x7, =((6 * 1024 * 1024) - 1)
        add     x8, x8, x7
        b       common
 
@@ -457,6 +470,34 @@ booti_no_fdt:
 #endif
 
 common:
+#if PAGE_SIZE != PAGE_SIZE_4K
+       /*
+        * Create L3 pages. The kernel will be loaded at a 2M aligned
+        * address, however L2 blocks are too large when the page size is
+        * not 4k to map the kernel with such an aligned address. However,
+        * when the page size is larger than 4k, L2 blocks are too large to
+        * map the kernel with such an alignment.
+        */
+
+       /* Get the number of l3 pages to allocate, rounded down */
+       lsr     x10, x8, #(L3_SHIFT)
+
+       /* Create the kernel space L2 table */
+       mov     x6, x26
+       mov     x7, #(ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
+       mov     x8, #(KERNBASE)
+       mov     x9, x28
+       bl      build_l3_page_pagetable
+
+       /* Move to the l2 table */
+       ldr     x9, =(PAGE_SIZE * L3_PAGE_COUNT)
+       add     x26, x26, x9
+
+       /* Link the l2 -> l3 table */
+       mov     x9, x6
+       mov     x6, x26
+       bl      link_l2_pagetable
+#else
        /* Get the number of l2 pages to allocate, rounded down */
        lsr     x10, x8, #(L2_SHIFT)
 
@@ -466,6 +507,7 @@ common:
        mov     x8, #(KERNBASE)
        mov     x9, x28
        bl      build_l2_block_pagetable
+#endif
 
        /* Move to the l1 table */
        add     x26, x26, #PAGE_SIZE
@@ -504,7 +546,8 @@ common:
 #if defined(SOCDEV_PA)
        /* Create a table for the UART */
        mov     x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE))
-       add     x16, x16, #(L2_SIZE)    /* VA start */
+       ldr     x9, =(L2_SIZE)
+       add     x16, x16, x9    /* VA start */
        mov     x8, x16
 
        /* Store the socdev virtual address */
@@ -523,7 +566,8 @@ common:
 
        /* Create the mapping for FDT data (2 MiB max) */
        mov     x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
-       add     x16, x16, #(L2_SIZE)    /* VA start */
+       ldr     x9, =(L2_SIZE)
+       add     x16, x16, x9    /* VA start */
        mov     x8, x16
        mov     x9, x0                  /* PA start */
        /* Update the module pointer to point at the allocated memory */
@@ -662,6 +706,76 @@ LENTRY(build_l2_block_pagetable)
        ret
 LEND(build_l2_block_pagetable)
 
+#if PAGE_SIZE != PAGE_SIZE_4K
+/*
+ * Builds an L2 -> L3 table descriptor
+ *
+ *  x6  = L2 table
+ *  x8  = Virtual Address
+ *  x9  = L3 PA (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+LENTRY(link_l2_pagetable)
+       /*
+        * Link an L2 -> L3 table entry.
+        */
+       /* Find the table index */
+       lsr     x11, x8, #L2_SHIFT
+       and     x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L1 block entry */
+       mov     x12, #L2_TABLE
+
+       /* Only use the output address bits */
+       lsr     x9, x9, #PAGE_SHIFT
+       orr     x13, x12, x9, lsl #PAGE_SHIFT
+
+       /* Store the entry */
+       str     x13, [x6, x11, lsl #3]
+
+       ret
+LEND(link_l2_pagetable)
+
+/*
+ * Builds count level 3 page table entries
+ *  x6  = L3 table
+ *  x7  = Block attributes
+ *  x8  = VA start
+ *  x9  = PA start (trashed)
+ *  x10 = Entry count (trashed)
+ *  x11, x12 and x13 are trashed
+ */
+LENTRY(build_l3_page_pagetable)
+       /*
+        * Build the L3 table entry.
+        */
+       /* Find the table index */
+       lsr     x11, x8, #L3_SHIFT
+       and     x11, x11, #Ln_ADDR_MASK
+
+       /* Build the L3 page entry */
+       orr     x12, x7, #L3_PAGE
+       orr     x12, x12, #(ATTR_DEFAULT)
+       orr     x12, x12, #(ATTR_S1_UXN)
+
+       /* Only use the output address bits */
+       lsr     x9, x9, #L3_SHIFT
+
+       /* Set the physical address for this virtual address */
+1:     orr     x13, x12, x9, lsl #L3_SHIFT
+
+       /* Store the entry */
+       str     x13, [x6, x11, lsl #3]
+
+       sub     x10, x10, #1
+       add     x11, x11, #1
+       add     x9, x9, #1
+       cbnz    x10, 1b
+
+       ret
+LEND(build_l3_page_pagetable)
+#endif
+
 LENTRY(start_mmu)
        dsb     sy
 
@@ -743,7 +857,15 @@ mair:
                MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH) |   \
                MAIR_ATTR(MAIR_DEVICE_nGnRE, VM_MEMATTR_DEVICE_nGnRE)
 tcr:
-       .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | TCR_TG0_4K | \
+#if PAGE_SIZE == PAGE_SIZE_4K
+#define        TCR_TG  (TCR_TG1_4K | TCR_TG0_4K)
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#define        TCR_TG  (TCR_TG1_16K | TCR_TG0_16K)
+#else
+#error Unsupported page size
+#endif
+
+       .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG | \
            TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
 sctlr_set:
        /* Bits to set */
@@ -774,6 +896,10 @@ END(abort)
         */
        .globl pagetable_l0_ttbr1
 pagetable:
+#if PAGE_SIZE != PAGE_SIZE_4K
+       .space  (PAGE_SIZE * L3_PAGE_COUNT)
+pagetable_l2_ttbr1:
+#endif
        .space  PAGE_SIZE
 pagetable_l1_ttbr1:
        .space  PAGE_SIZE
diff --git a/sys/arm64/arm64/minidump_machdep.c 
b/sys/arm64/arm64/minidump_machdep.c
index 3dfeb3dfef1e..ee2b1be9b0b1 100644
--- a/sys/arm64/arm64/minidump_machdep.c
+++ b/sys/arm64/arm64/minidump_machdep.c
@@ -239,7 +239,13 @@ cpu_minidumpsys(struct dumperinfo *di, const struct 
minidumpstate *state)
        mdhdr.dmapbase = DMAP_MIN_ADDRESS;
        mdhdr.dmapend = DMAP_MAX_ADDRESS;
        mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
+#if PAGE_SIZE == PAGE_SIZE_4K
        mdhdr.flags = MINIDUMP_FLAG_PS_4K;
+#elif PAGE_SIZE == PAGE_SIZE_16K
+       mdhdr.flags = MINIDUMP_FLAG_PS_16K;
+#else
+#error Unsupported page size
+#endif
 
        dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION,
            dumpsize);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3b37ce214664..d95eccb445a5 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -286,10 +286,6 @@ vm_paddr_t dmap_phys_base; /* The start of the dmap region 
*/
 vm_paddr_t dmap_phys_max;      /* The limit of the dmap region */
 vm_offset_t dmap_max_addr;     /* The virtual address limit of the dmap */
 
-/* This code assumes all L1 DMAP entries will be used */
-CTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
-CTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
-
 extern pt_entry_t pagetable_l0_ttbr1[];
 
 #define        PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
@@ -299,6 +295,15 @@ static u_int physmap_idx;
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VM/pmap parameters");
 
+#if PAGE_SIZE == PAGE_SIZE_4K
+#define        L1_BLOCKS_SUPPORTED     1
+#else
+/* TODO: Make this dynamic when we support FEAT_LPA2 (TCR_EL1.DS == 1) */
+#define        L1_BLOCKS_SUPPORTED     0
+#endif
+
+#define        PMAP_ASSERT_L1_BLOCKS_SUPPORTED MPASS(L1_BLOCKS_SUPPORTED)
+
 /*
  * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
  * that it has currently allocated to a pmap, a cursor ("asid_next") to
@@ -571,6 +576,7 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
        }
        desc = pmap_load(l1) & ATTR_DESCR_MASK;
        if (desc == L1_BLOCK) {
+               PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                *level = 1;
                return (l1);
        }
@@ -621,9 +627,11 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, 
const char *diag)
        if (desc == L0_TABLE && level > 0) {
                l1p = pmap_l0_to_l1(l0p, va);
                desc = pmap_load(l1p) & ATTR_DESCR_MASK;
-               if (desc == L1_BLOCK && level == 1)
+               if (desc == L1_BLOCK && level == 1) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        return (l1p);
-               else if (desc == L1_TABLE && level > 1) {
+               }
+               if (desc == L1_TABLE && level > 1) {
                        l2p = pmap_l1_to_l2(l1p, va);
                        desc = pmap_load(l2p) & ATTR_DESCR_MASK;
                        if (desc == L2_BLOCK && level == 2)
@@ -673,6 +681,7 @@ pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t 
**l0, pd_entry_t **l1,
        *l1 = l1p;
 
        if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
+               PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                *l2 = NULL;
                *l3 = NULL;
                return (true);
@@ -1013,29 +1022,36 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t 
min_pa,
                        pmap_bootstrap_dmap_l3_page(&state, i);
                MPASS(state.pa <= physmap[i + 1]);
 
-               /* Create L2 mappings at the start of the region */
-               if ((state.pa & L1_OFFSET) != 0)
-                       pmap_bootstrap_dmap_l2_block(&state, i);
-               MPASS(state.pa <= physmap[i + 1]);
+               if (L1_BLOCKS_SUPPORTED) {
+                       /* Create L2 mappings at the start of the region */
+                       if ((state.pa & L1_OFFSET) != 0)
+                               pmap_bootstrap_dmap_l2_block(&state, i);
+                       MPASS(state.pa <= physmap[i + 1]);
+
+                       /* Create the main L1 block mappings */
+                       for (; state.va < DMAP_MAX_ADDRESS &&
+                           (physmap[i + 1] - state.pa) >= L1_SIZE;
+                           state.va += L1_SIZE, state.pa += L1_SIZE) {
+                               /* Make sure there is a valid L1 table */
+                               pmap_bootstrap_dmap_l0_table(&state);
+                               MPASS((state.pa & L1_OFFSET) == 0);
+                               pmap_store(&state.l1[pmap_l1_index(state.va)],
+                                   state.pa | ATTR_DEFAULT | ATTR_S1_XN |
+                                   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
+                                   L1_BLOCK);
+                       }
+                       MPASS(state.pa <= physmap[i + 1]);
 
-               /* Create the main L1 block mappings */
-               for (; state.va < DMAP_MAX_ADDRESS &&
-                   (physmap[i + 1] - state.pa) >= L1_SIZE;
-                   state.va += L1_SIZE, state.pa += L1_SIZE) {
-                       /* Make sure there is a valid L1 table */
-                       pmap_bootstrap_dmap_l0_table(&state);
-                       MPASS((state.pa & L1_OFFSET) == 0);
-                       pmap_store(&state.l1[pmap_l1_index(state.va)],
-                           state.pa | ATTR_DEFAULT | ATTR_S1_XN |
-                           ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
-                           L1_BLOCK);
+                       /* Create L2 mappings at the end of the region */
+                       pmap_bootstrap_dmap_l2_block(&state, i);
+               } else {
+                       while (state.va < DMAP_MAX_ADDRESS &&
+                           (physmap[i + 1] - state.pa) >= L2_SIZE) {
+                               pmap_bootstrap_dmap_l2_block(&state, i);
+                       }
                }
                MPASS(state.pa <= physmap[i + 1]);
 
-               /* Create L2 mappings at the end of the region */
-               pmap_bootstrap_dmap_l2_block(&state, i);
-               MPASS(state.pa <= physmap[i + 1]);
-
                /* Create L3 mappings at the end of the region */
                pmap_bootstrap_dmap_l3_page(&state, i);
                MPASS(state.pa == physmap[i + 1]);
@@ -1261,9 +1277,11 @@ pmap_init(void)
                KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
                    ("pmap_init: can't assign to pagesizes[1]"));
                pagesizes[1] = L2_SIZE;
-               KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
-                   ("pmap_init: can't assign to pagesizes[2]"));
-               pagesizes[2] = L1_SIZE;
+               if (L1_BLOCKS_SUPPORTED) {
+                       KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
+                           ("pmap_init: can't assign to pagesizes[2]"));
+                       pagesizes[2] = L1_SIZE;
+               }
        }
 
        /*
@@ -1483,6 +1501,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
                pa = tpte & ~ATTR_MASK;
                switch(lvl) {
                case 1:
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
                            ("pmap_extract: Invalid L1 pte found: %lx",
                            tpte & ATTR_DESCR_MASK));
@@ -1530,6 +1549,10 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, 
vm_prot_t prot)
 
                KASSERT(lvl > 0 && lvl <= 3,
                    ("pmap_extract_and_hold: Invalid level %d", lvl));
+               /*
+                * Check that the pte is either a L3 page, or a L1 or L2 block
+                * entry. We can assume L1_BLOCK == L2_BLOCK.
+                */
                KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
                    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
                    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
@@ -2426,8 +2449,13 @@ pmap_growkernel(vm_offset_t addr)
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
+#if PAGE_SIZE == PAGE_SIZE_4K
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
+#else
+CTASSERT(_NPCM == 11);
+CTASSERT(_NPCPV == 677);
+#endif
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
@@ -2438,11 +2466,30 @@ pv_to_chunk(pv_entry_t pv)
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
-#define        PC_FREE0        0xfffffffffffffffful
-#define        PC_FREE1        0xfffffffffffffffful
-#define        PC_FREE2        0x000000fffffffffful
+#define        PC_FREEN        0xfffffffffffffffful
+#if _NPCM == 3
+#define        PC_FREEL        0x000000fffffffffful
+#elif _NPCM == 11
+#define        PC_FREEL        0x0000001ffffffffful
+#endif
+
+#if _NPCM == 3
+#define        PC_IS_FREE(pc)  ((pc)->pc_map[0] == PC_FREEN &&                 
\
+    (pc)->pc_map[1] == PC_FREEN && (pc)->pc_map[2] == PC_FREEL)
+#else
+#define        PC_IS_FREE(pc)                                                  
\
+    (memcmp((pc)->pc_map, pc_freemask, sizeof(pc_freemask)) == 0)
+#endif
 
-static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
+static const uint64_t pc_freemask[] = { PC_FREEN, PC_FREEN,
+#if _NPCM > 3
+    PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN,
+    PC_FREEN,
+#endif
+    PC_FREEL
+};
+
+CTASSERT(nitems(pc_freemask) == _NPCM);
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
@@ -2608,8 +2655,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
                PV_STAT(atomic_add_int(&pv_entry_spare, freed));
                PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
                TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-               if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
-                   pc->pc_map[2] == PC_FREE2) {
+               if (PC_IS_FREE(pc)) {
                        PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
                        PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
                        PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
@@ -2678,8 +2724,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
        field = idx / 64;
        bit = idx % 64;
        pc->pc_map[field] |= 1ul << bit;
-       if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
-           pc->pc_map[2] != PC_FREE2) {
+       if (!PC_IS_FREE(pc)) {
                /* 98% of the time, pc is already at the head of the list. */
                if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
                        TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
@@ -2767,9 +2812,8 @@ retry:
        dump_add_page(m->phys_addr);
        pc = (void *)PHYS_TO_DMAP(m->phys_addr);
        pc->pc_pmap = pmap;
-       pc->pc_map[0] = PC_FREE0 & ~1ul;        /* preallocated bit 0 */
-       pc->pc_map[1] = PC_FREE1;
-       pc->pc_map[2] = PC_FREE2;
+       memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
+       pc->pc_map[0] &= ~1ul;          /* preallocated bit 0 */
        mtx_lock(&pv_chunks_mutex);
        TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
        mtx_unlock(&pv_chunks_mutex);
@@ -2829,9 +2873,7 @@ retry:
                dump_add_page(m->phys_addr);
                pc = (void *)PHYS_TO_DMAP(m->phys_addr);
                pc->pc_pmap = pmap;
-               pc->pc_map[0] = PC_FREE0;
-               pc->pc_map[1] = PC_FREE1;
-               pc->pc_map[2] = PC_FREE2;
+               memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
                TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
                TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
                PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
@@ -3265,6 +3307,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                if (pmap_load(l1) == 0)
                        continue;
                if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT(va_next <= eva,
                            ("partial update of non-transparent 1G page "
                            "l1 %#lx sva %#lx eva %#lx va_next %#lx",
@@ -3518,6 +3561,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t 
eva, vm_prot_t prot)
                if (pmap_load(l1) == 0)
                        continue;
                if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT(va_next <= eva,
                            ("partial update of non-transparent 1G page "
                            "l1 %#lx sva %#lx eva %#lx va_next %#lx",
@@ -3848,9 +3892,10 @@ restart:
                                mp->ref_count++;
                        }
                }
-               KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
-                   ((origpte & ATTR_DESCR_MASK) == L1_BLOCK &&
-                    (origpte & ~ATTR_MASK) == (newpte & ~ATTR_MASK)),
+               KASSERT((origpte & ~ATTR_MASK) == (newpte & ~ATTR_MASK) ||
+                   (L1_BLOCKS_SUPPORTED &&
+                   (origpte & ATTR_DESCR_MASK) == L1_BLOCK &&
+                   (origpte & ATTR_DESCR_VALID) == 0),
                    ("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
                    va, origpte, newpte));
                pmap_store(l1p, newpte);
@@ -3980,9 +4025,10 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, 
vm_prot_t prot,
                KASSERT((m->oflags & VPO_UNMANAGED) != 0,
                    ("managed largepage va %#lx flags %#x", va, flags));
                new_l3 &= ~L3_PAGE;
-               if (psind == 2)
+               if (psind == 2) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        new_l3 |= L1_BLOCK;
-               else /* (psind == 1) */
+               } else /* (psind == 1) */
                        new_l3 |= L2_BLOCK;
                rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind);
                goto out;
@@ -4660,6 +4706,7 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                        continue;
 
                if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT(va_next <= eva,
                            ("partial update of non-transparent 1G page "
                            "l1 %#lx sva %#lx eva %#lx va_next %#lx",
@@ -4772,6 +4819,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t 
dst_addr, vm_size_t len,
                if (pmap_load(l1) == 0)
                        continue;
                if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT(va_next <= end_addr,
                            ("partial update of non-transparent 1G page "
                            "l1 %#lx addr %#lx end_addr %#lx va_next %#lx",
@@ -5730,6 +5778,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t 
eva, int advice)
                if (pmap_load(l1) == 0)
                        continue;
                if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
+                       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                        KASSERT(va_next <= eva,
                            ("partial update of non-transparent 1G page "
                            "l1 %#lx sva %#lx eva %#lx va_next %#lx",
@@ -6243,6 +6292,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, 
vm_prot_t prot,
                        default:
                                panic("Invalid DMAP table level: %d\n", lvl);
                        case 1:
+                               PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                                if ((tmpva & L1_OFFSET) == 0 &&
                                    (base + size - tmpva) >= L1_SIZE) {
                                        pte_size = L1_SIZE;
@@ -6318,6 +6368,7 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t 
va)
 
        PMAP_LOCK_ASSERT(pmap, MA_OWNED);
        oldl1 = pmap_load(l1);
+       PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
        KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
            ("pmap_demote_l1: Demoting a non-block entry"));
        KASSERT((va & L1_OFFSET) == 0,
@@ -7400,6 +7451,7 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
                                continue;
                        }
                        if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
+                               PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
                                sysctl_kmaps_check(sb, &range, sva, l0e, l1e,
                                    0, 0);
                                range.l1blocks++;
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 8c7c26fce8a0..87527c390f57 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -106,8 +106,18 @@ typedef struct pv_entry {
  * pv_entries are allocated in chunks per-process.  This avoids the
  * need to track per-pmap assignments.
  */
+#if PAGE_SIZE == PAGE_SIZE_4K
 #define        _NPCM   3
 #define        _NPCPV  168
+#define        _NPAD   0
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#define        _NPCM   11
+#define        _NPCPV  677
+#define        _NPAD   1
+#else
+#error Unsupported page size
+#endif
+
 #define        PV_CHUNK_HEADER                                                 
\
        pmap_t                  pc_pmap;                                \
        TAILQ_ENTRY(pv_chunk)   pc_list;                                \
@@ -121,6 +131,7 @@ struct pv_chunk_header {
 struct pv_chunk {
        PV_CHUNK_HEADER
        struct pv_entry         pc_pventry[_NPCPV];
+       uint64_t                pc_pad[_NPAD];
 };
 
 struct thread;
diff --git a/sys/arm64/include/pte.h b/sys/arm64/include/pte.h
index 3ce11133e2ef..eaf6745f9679 100644
--- a/sys/arm64/include/pte.h
+++ b/sys/arm64/include/pte.h
@@ -109,33 +109,43 @@ typedef   uint64_t        pt_entry_t;             /* page 
table entry */
 #define        ATTR_DESCR_TYPE_PAGE    2
 #define        ATTR_DESCR_TYPE_BLOCK   0
 
-/* Level 0 table, 512GiB per entry */
+#if PAGE_SIZE == PAGE_SIZE_4K
 #define        L0_SHIFT        39
-#define        L0_SIZE         (1ul << L0_SHIFT)
+#define        L1_SHIFT        30
+#define        L2_SHIFT        21
+#define        L3_SHIFT        12
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#define        L0_SHIFT        47
+#define        L1_SHIFT        36
+#define        L2_SHIFT        25
+#define        L3_SHIFT        14
+#else
+#error Unsupported page size
+#endif
+
+/* Level 0 table, 512GiB/128TiB per entry */
+#define        L0_SIZE         (UINT64_C(1) << L0_SHIFT)
 #define        L0_OFFSET       (L0_SIZE - 1ul)
 #define        L0_INVAL        0x0 /* An invalid address */
        /* 0x1 Level 0 doesn't support block translation */
        /* 0x2 also marks an invalid address */
 #define        L0_TABLE        0x3 /* A next-level table */
 
-/* Level 1 table, 1GiB per entry */
-#define        L1_SHIFT        30
-#define        L1_SIZE         (1 << L1_SHIFT)
+/* Level 1 table, 1GiB/64GiB per entry */
+#define        L1_SIZE         (UINT64_C(1) << L1_SHIFT)
 #define        L1_OFFSET       (L1_SIZE - 1)
 #define        L1_INVAL        L0_INVAL
 #define        L1_BLOCK        0x1
 #define        L1_TABLE        L0_TABLE
 
-/* Level 2 table, 2MiB per entry */
-#define        L2_SHIFT        21
-#define        L2_SIZE         (1 << L2_SHIFT)
+/* Level 2 table, 2MiB/32MiB per entry */
+#define        L2_SIZE         (UINT64_C(1) << L2_SHIFT)
 #define        L2_OFFSET       (L2_SIZE - 1)
 #define        L2_INVAL        L1_INVAL
-#define        L2_BLOCK        L1_BLOCK
+#define        L2_BLOCK        0x1
 #define        L2_TABLE        L1_TABLE
 
-/* Level 3 table, 4KiB per entry */
-#define        L3_SHIFT        12
+/* Level 3 table, 4KiB/16KiB per entry */
 #define        L3_SIZE         (1 << L3_SHIFT)
 #define        L3_OFFSET       (L3_SIZE - 1)
 #define        L3_INVAL        0x0
@@ -145,11 +155,19 @@ typedef   uint64_t        pt_entry_t;             /* page 
table entry */
 
 #define        PMAP_MAPDEV_EARLY_SIZE  (L2_SIZE * 8)
 
+#if PAGE_SIZE == PAGE_SIZE_4K
 #define        L0_ENTRIES_SHIFT 9
+#define        Ln_ENTRIES_SHIFT 9
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#define        L0_ENTRIES_SHIFT 1
+#define        Ln_ENTRIES_SHIFT 11
+#else
+#error Unsupported page size
+#endif
+
 #define        L0_ENTRIES      (1 << L0_ENTRIES_SHIFT)
 #define        L0_ADDR_MASK    (L0_ENTRIES - 1)
 
-#define        Ln_ENTRIES_SHIFT 9
 #define        Ln_ENTRIES      (1 << Ln_ENTRIES_SHIFT)
 #define        Ln_ADDR_MASK    (Ln_ENTRIES - 1)
 #define        Ln_TABLE_MASK   ((1 << 12) - 1)

Reply via email to