The branch main has been updated by andrew:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c15085278cb55bd3c1ea252adf5635bb6800b431

commit c15085278cb55bd3c1ea252adf5635bb6800b431
Author:     Andrew Turner <and...@freebsd.org>
AuthorDate: 2022-08-19 13:54:36 +0000
Commit:     Andrew Turner <and...@freebsd.org>
CommitDate: 2022-11-17 17:12:40 +0000

    arm64 pmap: implement per-superpage locks
    
    As with amd64 pmap introduce per-superpage locks backed by pages
    allocated by their respective domains.
    
    This significiantly reduces lock contantion from pmap when running
    poudriere on a 160 core Ampere Altra server
    
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D36310
---
 sys/arm64/arm64/pmap.c | 209 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 141 insertions(+), 68 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 4e101ace8356..7df16bc41922 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -188,8 +188,15 @@ __FBSDID("$FreeBSD$");
 #define        pmap_l1_pindex(v)       (NUL2E + ((v) >> L1_SHIFT))
 #define        pmap_l2_pindex(v)       ((v) >> L2_SHIFT)
 
-static struct md_page *
-pa_to_pvh(vm_paddr_t pa)
+struct pmap_large_md_page {
+       struct rwlock   pv_lock;
+       struct md_page  pv_page;
+       /* Pad to a power of 2, see pmap_init_pv_table(). */
+       int             pv_pad[2];
+};
+
+static struct pmap_large_md_page *
+_pa_to_pmdp(vm_paddr_t pa)
 {
        struct vm_phys_seg *seg;
        int segind;
@@ -197,26 +204,46 @@ pa_to_pvh(vm_paddr_t pa)
        for (segind = 0; segind < vm_phys_nsegs; segind++) {
                seg = &vm_phys_segs[segind];
                if (pa >= seg->start && pa < seg->end)
-                       return ((struct md_page *)seg->md_first +
+                       return ((struct pmap_large_md_page *)seg->md_first +
                            pmap_l2_pindex(pa) - pmap_l2_pindex(seg->start));
        }
-       panic("pa 0x%jx not within vm_phys_segs", (uintmax_t)pa);
+       return (NULL);
+}
+
+static struct pmap_large_md_page *
+pa_to_pmdp(vm_paddr_t pa)
+{
+       struct pmap_large_md_page *pvd;
+
+       pvd = _pa_to_pmdp(pa);
+       if (pvd == NULL)
+               panic("pa 0x%jx not within vm_phys_segs", (uintmax_t)pa);
+       return (pvd);
 }
 
-static struct md_page *
-page_to_pvh(vm_page_t m)
+static struct pmap_large_md_page *
+page_to_pmdp(vm_page_t m)
 {
        struct vm_phys_seg *seg;
 
        seg = &vm_phys_segs[m->segind];
-       return ((struct md_page *)seg->md_first +
+       return ((struct pmap_large_md_page *)seg->md_first +
            pmap_l2_pindex(VM_PAGE_TO_PHYS(m)) - pmap_l2_pindex(seg->start));
 }
 
-#define        NPV_LIST_LOCKS  MAXCPU
+#define        pa_to_pvh(pa)   (&(pa_to_pmdp(pa)->pv_page))
+#define        page_to_pvh(m)  (&(page_to_pmdp(m)->pv_page))
 
-#define        PHYS_TO_PV_LIST_LOCK(pa)        \
-                       (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+#define        PHYS_TO_PV_LIST_LOCK(pa)        ({                      \
+       struct pmap_large_md_page *_pvd;                        \
+       struct rwlock *_lock;                                   \
+       _pvd = _pa_to_pmdp(pa);                                 \
+       if (__predict_false(_pvd == NULL))                      \
+               _lock = &pv_dummy_large.pv_lock;                \
+       else                                                    \
+               _lock = &(_pvd->pv_lock);                       \
+       _lock;                                                  \
+})
 
 #define        CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {    \
        struct rwlock **_lockp = (lockp);               \
@@ -304,9 +331,10 @@ struct pv_chunks_list {
 
 struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
 
-static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
-static struct md_page *pv_table;
-static struct md_page pv_dummy;
+__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
+#define pv_dummy pv_dummy_large.pv_page
+__read_mostly static struct pmap_large_md_page *pv_table;
+__read_mostly vm_paddr_t pmap_last_pa;
 
 vm_paddr_t dmap_phys_base;     /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;      /* The limit of the dmap region */
@@ -1311,6 +1339,104 @@ pmap_init_asids(struct asid_set *set, int bits)
        mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN);
 }
 
+static void
+pmap_init_pv_table(void)
+{
+       struct vm_phys_seg *seg, *next_seg;
+       struct pmap_large_md_page *pvd;
+       vm_size_t s;
+       long start, end, highest, pv_npg;
+       int domain, i, j, pages;
+
+       /*
+        * We strongly depend on the size being a power of two, so the assert
+        * is overzealous. However, should the struct be resized to a
+        * different power of two, the code below needs to be revisited.
+        */
+       CTASSERT((sizeof(*pvd) == 64));
+
+       /*
+        * Calculate the size of the array.
+        */
+       pv_npg = 0;
+       for (i = 0; i < vm_phys_nsegs; i++) {
+               seg = &vm_phys_segs[i];
+               pv_npg += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+                   pmap_l2_pindex(seg->start);
+       }
+       s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page);
+       s = round_page(s);
+       pv_table = (struct pmap_large_md_page *)kva_alloc(s);
+       if (pv_table == NULL)
+               panic("%s: kva_alloc failed\n", __func__);
+
+       /*
+        * Iterate physical segments to allocate domain-local memory for PV
+        * list headers.
+        */
+       highest = -1;
+       s = 0;
+       for (i = 0; i < vm_phys_nsegs; i++) {
+               seg = &vm_phys_segs[i];
+               start = highest + 1;
+               end = start + pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+                   pmap_l2_pindex(seg->start);
+               domain = seg->domain;
+
+               if (highest >= end)
+                       continue;
+
+               pvd = &pv_table[start];
+
+               pages = end - start + 1;
+               s = round_page(pages * sizeof(*pvd));
+               highest = start + (s / sizeof(*pvd)) - 1;
+
+               for (j = 0; j < s; j += PAGE_SIZE) {
+                       vm_page_t m = vm_page_alloc_noobj_domain(domain,
+                           VM_ALLOC_ZERO);
+                       if (m == NULL)
+                               panic("failed to allocate PV table page");
+                       pmap_qenter((vm_offset_t)pvd + j, &m, 1);
+               }
+
+               for (j = 0; j < s / sizeof(*pvd); j++) {
+                       rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
+                       TAILQ_INIT(&pvd->pv_page.pv_list);
+                       pvd++;
+               }
+       }
+       pvd = &pv_dummy_large;
+       memset(pvd, 0, sizeof(*pvd));
+       rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
+       TAILQ_INIT(&pvd->pv_page.pv_list);
+
+       /*
+        * Set pointers from vm_phys_segs to pv_table.
+        */
+       for (i = 0, pvd = pv_table; i < vm_phys_nsegs; i++) {
+               seg = &vm_phys_segs[i];
+               seg->md_first = pvd;
+               pvd += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+                   pmap_l2_pindex(seg->start);
+
+               /*
+                * If there is a following segment, and the final
+                * superpage of this segment and the initial superpage
+                * of the next segment are the same then adjust the
+                * pv_table entry for that next segment down by one so
+                * that the pv_table entries will be shared.
+                */
+               if (i + 1 < vm_phys_nsegs) {
+                       next_seg = &vm_phys_segs[i + 1];
+                       if (pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - 1 ==
+                           pmap_l2_pindex(next_seg->start)) {
+                               pvd--;
+                       }
+               }
+       }
+}
+
 /*
  *     Initialize the pmap module.
  *     Called by vm_init, to initialize any structures that the pmap
@@ -1319,11 +1445,8 @@ pmap_init_asids(struct asid_set *set, int bits)
 void
 pmap_init(void)
 {
-       struct vm_phys_seg *seg, *next_seg;
-       struct md_page *pvh;
-       vm_size_t s;
        uint64_t mmfr1;
-       int i, pv_npg, vmid_bits;
+       int i, vmid_bits;
 
        /*
         * Are large page mappings enabled?
@@ -1364,57 +1487,7 @@ pmap_init(void)
                    MTX_DEF);
                TAILQ_INIT(&pv_chunks[i].pvc_list);
        }
-
-       /*
-        * Initialize the pool of pv list locks.
-        */
-       for (i = 0; i < NPV_LIST_LOCKS; i++)
-               rw_init(&pv_list_locks[i], "pmap pv list");
-
-       /*
-        * Calculate the size of the pv head table for superpages.
-        */
-       pv_npg = 0;
-       for (i = 0; i < vm_phys_nsegs; i++) {
-               seg = &vm_phys_segs[i];
-               pv_npg += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
-                   pmap_l2_pindex(seg->start);
-       }
-
-       /*
-        * Allocate memory for the pv head table for superpages.
-        */
-       s = (vm_size_t)(pv_npg * sizeof(struct md_page));
-       s = round_page(s);
-       pv_table = kmem_malloc(s, M_WAITOK | M_ZERO);
-       for (i = 0; i < pv_npg; i++)
-               TAILQ_INIT(&pv_table[i].pv_list);
-       TAILQ_INIT(&pv_dummy.pv_list);
-
-       /*
-        * Set pointers from vm_phys_segs to pv_table.
-        */
-       for (i = 0, pvh = pv_table; i < vm_phys_nsegs; i++) {
-               seg = &vm_phys_segs[i];
-               seg->md_first = pvh;
-               pvh += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
-                   pmap_l2_pindex(seg->start);
-
-               /*
-                * If there is a following segment, and the final
-                * superpage of this segment and the initial superpage
-                * of the next segment are the same then adjust the
-                * pv_table entry for that next segment down by one so
-                * that the pv_table entries will be shared.
-                */
-               if (i + 1 < vm_phys_nsegs) {
-                       next_seg = &vm_phys_segs[i + 1];
-                       if (pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - 1 ==
-                           pmap_l2_pindex(next_seg->start)) {
-                               pvh--;
-                       }
-               }
-       }
+       pmap_init_pv_table();
 
        vm_initialized = 1;
 }

Reply via email to