In message <201910070406.x9746n0u009...@slippy.cwsent.com>, Cy Schubert 
writes:
> In message <201910062213.x96mdzv3085...@repo.freebsd.org>, Mateusz Guzik 
> writes
> :
> > Author: mjg
> > Date: Sun Oct  6 22:13:35 2019
> > New Revision: 353149
> > URL: https://svnweb.freebsd.org/changeset/base/353149
> >
> > Log:
> >   amd64 pmap: implement per-superpage locks
> >   
> >   The current 256-lock sized array is a problem in the following ways:
> >   - it's way too small
> >   - there are 2 locks per cacheline
> >   - it is not NUMA-aware
> >   
> >   Solve these issues by introducing per-superpage locks backed by pages
> >   allocated from respective domains.
> >   
> >   This significantly reduces contention e.g. during poudriere -j 104.
> >   See the review for results.
> >   
> >   Reviewed by:      kib
> >   Discussed with:   jeff
> >   Sponsored by:     The FreeBSD Foundation
> >   Differential Revision:    https://reviews.freebsd.org/D21833
> >
> > Modified:
> >   head/sys/amd64/amd64/pmap.c
> >
> > Modified: head/sys/amd64/amd64/pmap.c
> > ===========================================================================
> ==
> > =
> > --- head/sys/amd64/amd64/pmap.c     Sun Oct  6 20:36:25 2019        (r35314
> > 8)
> > +++ head/sys/amd64/amd64/pmap.c     Sun Oct  6 22:13:35 2019        (r35314
> > 9)
> > @@ -316,13 +316,25 @@ pmap_pku_mask_bit(pmap_t pmap)
> >  #define PV_STAT(x) do { } while (0)
> >  #endif
> >  
> > -#define    pa_index(pa)    ((pa) >> PDRSHIFT)
> > +#undef pa_index
> > +#define    pa_index(pa)    ({                                      \
> > +   KASSERT((pa) <= vm_phys_segs[vm_phys_nsegs - 1].end,    \
> > +       ("address %lx beyond the last segment", (pa)));     \
> > +   (pa) >> PDRSHIFT;                                       \
> > +})
> > +#if VM_NRESERVLEVEL > 0
> > +#define    pa_to_pmdp(pa)  (&pv_table[pa_index(pa)])
> > +#define    pa_to_pvh(pa)   (&(pa_to_pmdp(pa)->pv_page))
> > +#define    PHYS_TO_PV_LIST_LOCK(pa)        \
> > +                   (&(pa_to_pmdp(pa)->pv_lock))
> > +#else
> >  #define    pa_to_pvh(pa)   (&pv_table[pa_index(pa)])
> >  
> >  #define    NPV_LIST_LOCKS  MAXCPU
> >  
> >  #define    PHYS_TO_PV_LIST_LOCK(pa)        \
> >                     (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
> > +#endif
> >  
> >  #define    CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {    \
> >     struct rwlock **_lockp = (lockp);               \
> > @@ -400,14 +412,22 @@ static int pmap_initialized;
> >  
> >  /*
> >   * Data for the pv entry allocation mechanism.
> > - * Updates to pv_invl_gen are protected by the pv_list_locks[]
> > - * elements, but reads are not.
> > + * Updates to pv_invl_gen are protected by the pv list lock but reads are 
> no
> > t.
> >   */
> >  static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chu
> nk
> > s);
> >  static struct mtx __exclusive_cache_line pv_chunks_mutex;
> > +#if VM_NRESERVLEVEL > 0
> > +struct pmap_large_md_page {
> > +   struct rwlock   pv_lock;
> > +   struct md_page  pv_page;
> > +   u_long pv_invl_gen;
> > +};
> > +static struct pmap_large_md_page *pv_table;
> > +#else
> >  static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
> >  static u_long pv_invl_gen[NPV_LIST_LOCKS];
> >  static struct md_page *pv_table;
> > +#endif
> >  static struct md_page pv_dummy;
> >  
> >  /*
> > @@ -918,12 +938,21 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFL
> A
> >      "Number of slow invalidation waits for lockless DI");
> >  #endif
> >  
> > +#if VM_NRESERVLEVEL > 0
> >  static u_long *
> >  pmap_delayed_invl_genp(vm_page_t m)
> >  {
> >  
> > +   return (&pa_to_pmdp(VM_PAGE_TO_PHYS(m))->pv_invl_gen);
> > +}
> > +#else
> > +static u_long *
> > +pmap_delayed_invl_genp(vm_page_t m)
> > +{
> > +
> >     return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
> >  }
> > +#endif
> >  
> >  static void
> >  pmap_delayed_invl_callout_func(void *arg __unused)
> > @@ -1803,6 +1832,112 @@ pmap_page_init(vm_page_t m)
> >     m->md.pat_mode = PAT_WRITE_BACK;
> >  }
> >  
> > +#if VM_NRESERVLEVEL > 0
> > +static void
> > +pmap_init_pv_table(void)
> > +{
> > +   struct pmap_large_md_page *pvd;
> > +   vm_size_t s;
> > +   long start, end, highest, pv_npg;
> > +   int domain, i, j, pages;
> > +
> > +   /*
> > +    * We strongly depend on the size being a power of two, so the assert
> > +    * is overzealous. However, should the struct be resized to a
> > +    * different power of two, the code below needs to be revisited.
> > +    */
> > +   CTASSERT((sizeof(*pvd) == 64));
> > +
> > +   /*
> > +    * Calculate the size of the array.
> > +    */
> > +   pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> > +   s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page);
> > +   s = round_page(s);
> > +   pv_table = (struct pmap_large_md_page *)kva_alloc(s);
> > +   if (pv_table == NULL)
> > +           panic("%s: kva_alloc failed\n", __func__);
> > +
> > +   /*
> > +    * Iterate physical segments to allocate space for respective pages.
> > +    */
> > +   highest = -1;
> > +   s = 0;
> > +   for (i = 0; i < vm_phys_nsegs; i++) {
> > +           start = vm_phys_segs[i].start / NBPDR;
> > +           end = vm_phys_segs[i].end / NBPDR;
> > +           domain = vm_phys_segs[i].domain;
> > +
> > +           if (highest >= end)
> > +                   continue;
> > +
> > +           if (start < highest) {
> > +                   start = highest + 1;
> > +                   pvd = &pv_table[start];
> > +           } else {
> > +                   /*
> > +                    * The lowest address may land somewhere in the middle
> > +                    * of our page. Simplify the code by pretending it is
> > +                    * at the beginning.
> > +                    */
> > +                   pvd = pa_to_pmdp(vm_phys_segs[i].start);
> > +                   pvd = (struct pmap_large_md_page *)trunc_page(pvd);
> > +                   start = pvd - pv_table;
> > +           }
> > +
> > +           pages = end - start + 1;
> > +           s = round_page(pages * sizeof(*pvd));
> > +           highest = start + (s / sizeof(*pvd)) - 1;
> > +
> > +           for (j = 0; j < s; j += PAGE_SIZE) {
> > +                   vm_page_t m = vm_page_alloc_domain(NULL, 0,
> > +                       domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
> > +                   if (m == NULL)
> > +                           panic("vm_page_alloc_domain failed for %lx\n", 
> > (vm_offset_t)pvd + j);
> > +                   pmap_qenter((vm_offset_t)pvd + j, &m, 1);
> > +           }
> > +
> > +           for (j = 0; j < s / sizeof(*pvd); j++) {
> > +                   rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
> > +                   TAILQ_INIT(&pvd->pv_page.pv_list);
> > +                   pvd->pv_page.pv_gen = 0;
> > +                   pvd->pv_page.pat_mode = 0;
> > +                   pvd->pv_invl_gen = 0;
> > +                   pvd++;
> > +           }
> > +   }
> > +   TAILQ_INIT(&pv_dummy.pv_list);
> > +}
> > +#else
> > +static void
> > +pmap_init_pv_table(void)
> > +{
> > +   vm_size_t s;
> > +   long i, pv_npg;
> > +
> > +   /*
> > +    * Initialize the pool of pv list locks.
> > +    */
> > +   for (i = 0; i < NPV_LIST_LOCKS; i++)
> > +           rw_init(&pv_list_locks[i], "pmap pv list");
> > +
> > +   /*
> > +    * Calculate the size of the pv head table for superpages.
> > +    */
> > +   pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> > +
> > +   /*
> > +    * Allocate memory for the pv head table for superpages.
> > +    */
> > +   s = (vm_size_t)pv_npg * sizeof(struct md_page);
> > +   s = round_page(s);
> > +   pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
> > +   for (i = 0; i < pv_npg; i++)
> > +           TAILQ_INIT(&pv_table[i].pv_list);
> > +   TAILQ_INIT(&pv_dummy.pv_list);
> > +}
> > +#endif
> > +
> >  /*
> >   * Initialize the pmap module.
> >   * Called by vm_init, to initialize any structures that the pmap
> > @@ -1813,8 +1948,7 @@ pmap_init(void)
> >  {
> >     struct pmap_preinit_mapping *ppim;
> >     vm_page_t m, mpte;
> > -   vm_size_t s;
> > -   int error, i, pv_npg, ret, skz63;
> > +   int error, i, ret, skz63;
> >  
> >     /* L1TF, reserve page @0 unconditionally */
> >     vm_page_blacklist_add(0, bootverbose);
> > @@ -1902,26 +2036,7 @@ pmap_init(void)
> >      */
> >     mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
> >  
> > -   /*
> > -    * Initialize the pool of pv list locks.
> > -    */
> > -   for (i = 0; i < NPV_LIST_LOCKS; i++)
> > -           rw_init(&pv_list_locks[i], "pmap pv list");
> > -
> > -   /*
> > -    * Calculate the size of the pv head table for superpages.
> > -    */
> > -   pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> > -
> > -   /*
> > -    * Allocate memory for the pv head table for superpages.
> > -    */
> > -   s = (vm_size_t)(pv_npg * sizeof(struct md_page));
> > -   s = round_page(s);
> > -   pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
> > -   for (i = 0; i < pv_npg; i++)
> > -           TAILQ_INIT(&pv_table[i].pv_list);
> > -   TAILQ_INIT(&pv_dummy.pv_list);
> > +   pmap_init_pv_table();
> >  
> >     pmap_initialized = 1;
> >     for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
> >
>
> This causes a page fault during X (xdm) startup, which loads 
> drm-current-kmod.
>
> db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 
> 0xfffffe0093e9c260
> vpanic() at vpanic+0x19d/frame 0xfffffe0093e9c2b0
> panic() at panic+0x43/frame 0xfffffe0093e9c310
> vm_fault() at vm_fault+0x2126/frame 0xfffffe0093e9c460
> vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c4b0
> trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c510
> trap() at trap+0x2a1/frame 0xfffffe0093e9c620
> calltrap() at calltrap+0x8/frame 0xfffffe0093e9c620
> --- trap 0xc, rip = 0xffffffff80a054b1, rsp = 0xfffffe0093e9c6f0, rbp = 
> 0xfffffe0093e9c7a0 ---
> pmap_enter() at pmap_enter+0x861/frame 0xfffffe0093e9c7a0
> vm_fault() at vm_fault+0x1c69/frame 0xfffffe0093e9c8f0
> vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c940
> trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c9a0
> trap() at trap+0x438/frame 0xfffffe0093e9cab0
> calltrap() at calltrap+0x8/frame 0xfffffe0093e9cab0
> --- trap 0xc, rip = 0x30e2a9c3, rsp = 0x7fffffffea50, rbp = 0x7fffffffeaa0 
> ---
> Uptime: 3m33s
> Dumping 945 out of 7974 MB:..2%..11%..21%..31%..41%..51%..61%..72%..82%..92%
>
> (kgdb) bt
> #0  doadump (textdump=1) at pcpu_aux.h:55
> #1  0xffffffff8068c5ed in kern_reboot (howto=260)
>     at /opt/src/svn-current/sys/kern/kern_shutdown.c:479
> #2  0xffffffff8068caa9 in vpanic (fmt=<value optimized out>, 
>     ap=<value optimized out>)
>     at /opt/src/svn-current/sys/kern/kern_shutdown.c:908
> #3  0xffffffff8068c8a3 in panic (fmt=<value optimized out>)
>     at /opt/src/svn-current/sys/kern/kern_shutdown.c:835
> #4  0xffffffff8098c966 in vm_fault (map=<value optimized out>, 
>     vaddr=<value optimized out>, fault_type=<value optimized out>, 
>     fault_flags=<value optimized out>, m_hold=<value optimized out>)
>     at /opt/src/svn-current/sys/vm/vm_fault.c:672
> #5  0xffffffff8098a723 in vm_fault_trap (map=0xfffff80002001000, 
>     vaddr=<value optimized out>, fault_type=2 '\002', 
>     fault_flags=<value optimized out>, signo=0x0, ucode=0x0)
>     at /opt/src/svn-current/sys/vm/vm_fault.c:568
> #6  0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9c630, 
>     signo=<value optimized out>, ucode=<value optimized out>)
>     at /opt/src/svn-current/sys/amd64/amd64/trap.c:828
> #7  0xffffffff80a177f1 in trap (frame=0xfffffe0093e9c630)
>     at /opt/src/svn-current/sys/amd64/amd64/trap.c:407
> #8  0xffffffff809f1aac in calltrap ()
>     at /opt/src/svn-current/sys/amd64/amd64/exception.S:289
> ---Type <return> to continue, or q <return> to quit---
> #9  0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>, 
>     va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>, 
>     flags=2677542912, psind=<value optimized out>) at atomic.h:221
> #10 0xffffffff8098c4a9 in vm_fault (map=<value optimized out>, 
>     vaddr=<value optimized out>, fault_type=232 '\ufffd', 
>     fault_flags=<value optimized out>, m_hold=0x0)
>     at /opt/src/svn-current/sys/vm/vm_fault.c:489
> #11 0xffffffff8098a723 in vm_fault_trap (map=0xfffff80173eb5000, 
>     vaddr=<value optimized out>, fault_type=2 '\002', 
>     fault_flags=<value optimized out>, signo=0xfffffe0093e9ca84, 
>     ucode=0xfffffe0093e9ca80) at /opt/src/svn-current/sys/vm/vm_fault.c:568
> #12 0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9cac0, 
>     signo=<value optimized out>, ucode=<value optimized out>)
>     at /opt/src/svn-current/sys/amd64/amd64/trap.c:828
> #13 0xffffffff80a17988 in trap (frame=0xfffffe0093e9cac0)
>     at /opt/src/svn-current/sys/amd64/amd64/trap.c:347
> #14 0xffffffff809f1aac in calltrap ()
>     at /opt/src/svn-current/sys/amd64/amd64/exception.S:289
> #15 0x0000000030e2a9c3 in ?? ()
> Previous frame inner to this frame (corrupt stack?)
> Current language:  auto; currently minimal
> (kgdb) frame 9
> #9  0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>, 
>     va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>, 
>     flags=2677542912, psind=<value optimized out>) at atomic.h:221
> 221   ATOMIC_CMPSET(long);
> (kgdb) l
> 216   }
> 217   
> 218   ATOMIC_CMPSET(char);
> 219   ATOMIC_CMPSET(short);
> 220   ATOMIC_CMPSET(int);
> 221   ATOMIC_CMPSET(long);
> 222   
> 223   /*
> 224    * Atomically add the value of v to the integer pointed to by p and 
> return
> 225    * the previous value of *p.
> (kgdb) 

I should use kgdb from ports instead of /usr/libexec version. Similar 
result.

<4>WARN_ON(!mutex_is_locked(&fbc->lock))WARN_ON(!mutex_is_locked(&fbc->
lock))
panic: vm_fault: fault on nofault entry, addr: 0xfffffe000e01c000
cpuid = 1
time = 1570417211
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 
0xfffffe0093e9c260
vpanic() at vpanic+0x19d/frame 0xfffffe0093e9c2b0
panic() at panic+0x43/frame 0xfffffe0093e9c310
vm_fault() at vm_fault+0x2126/frame 0xfffffe0093e9c460
vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c4b0
trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c510
trap() at trap+0x2a1/frame 0xfffffe0093e9c620
calltrap() at calltrap+0x8/frame 0xfffffe0093e9c620
--- trap 0xc, rip = 0xffffffff80a054b1, rsp = 0xfffffe0093e9c6f0, rbp = 
0xfffffe0093e9c7a0 ---
pmap_enter() at pmap_enter+0x861/frame 0xfffffe0093e9c7a0
vm_fault() at vm_fault+0x1c69/frame 0xfffffe0093e9c8f0
vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c940
trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c9a0
trap() at trap+0x438/frame 0xfffffe0093e9cab0
calltrap() at calltrap+0x8/frame 0xfffffe0093e9cab0
--- trap 0xc, rip = 0x30e2a9c3, rsp = 0x7fffffffea50, rbp = 0x7fffffffeaa0 
---
Uptime: 3m33s
Dumping 945 out of 7974 MB:..2%..11%..21%..31%..41%..51%..61%..72%..82%..92%

__curthread () at /opt/src/svn-current/sys/amd64/include/pcpu_aux.h:55
55              __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct 
pcpu,
(kgdb) 

Backtrace stopped: Cannot access memory at address 0x7fffffffea50
(kgdb) frame 10
#10 0xffffffff80a054b1 in atomic_fcmpset_long (dst=<optimized out>, 
    src=<optimized out>, expect=<optimized out>)
    at /opt/src/svn-current/sys/amd64/include/atomic.h:221
221     ATOMIC_CMPSET(long);
(kgdb) l
216     }
217     
218     ATOMIC_CMPSET(char);
219     ATOMIC_CMPSET(short);
220     ATOMIC_CMPSET(int);
221     ATOMIC_CMPSET(long);
222     
223     /*
224      * Atomically add the value of v to the integer pointed to by p and 
return
225      * the previous value of *p.
(kgdb) 



-- 
Cheers,
Cy Schubert <cy.schub...@cschubert.com>
FreeBSD UNIX:  <c...@freebsd.org>   Web:  http://www.FreeBSD.org

        The need of the many outweighs the greed of the few.


_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to