Author: marcel
Date: Thu Jun 30 20:34:55 2011
New Revision: 223700
URL: http://svn.freebsd.org/changeset/base/223700

Log:
  Change the management of nested faults by switching to physical
  addressing while reading or writing the trap frame. It's not
  possible to guarantee that the one translation cache entry that
  we depend on is not going to get purged by the CPU. We already
  know that global shootdowns (ptc.g and/or ptc.ga) can (and will)
  cause multiple TC entries to get purged and we initialize tried
  to handle that by serializing kernel entry with these operations.
  However, we need to serialize kernel exit as well.
  
  But even if we can serialize, it appears that CPU threads within
  a core can affect each other's TC entries beyond the global
  shootdown. This would mean serializing any and all translatation
  cache updates with the threads in a core with the kernel entry
  and exit of any thread in that core. This is just too painful
  and complicated.
  
  Since we already properly coded for the 2 nested faults that we
  can get, all we need to do is use those to obtain the physical
  address of the trap frame, switch to physical mode and in that
  way eliminate any further faults. The trap frame is already
  aligned to 1KB boundaries to make sure we don't cross the page
  boundary, this is safe to do.
  
  We still need to serialize ptc.g or ptc.ga across CPUs because
  the platform can only have 1 such operation outstanding at the
  same time. We can now use a regular (spin) lock for this.
  
  Also, it has been observed that we can get a nested TLB faults
  for region 7 virtual addresses. This was unexpected. For now,
  we enhance the nested TLB fault handler to deal with those as
  well, but it needs to be understood.

Modified:
  head/sys/ia64/ia64/exception.S
  head/sys/ia64/ia64/pmap.c

Modified: head/sys/ia64/ia64/exception.S
==============================================================================
--- head/sys/ia64/ia64/exception.S      Thu Jun 30 19:23:17 2011        
(r223699)
+++ head/sys/ia64/ia64/exception.S      Thu Jun 30 20:34:55 2011        
(r223700)
@@ -50,9 +50,6 @@ __FBSDID("$FreeBSD$");
 
        .section .ivt.data, "aw"
 
-       .global pmap_ptc_g_sem
-pmap_ptc_g_sem:        data8   0
-
        .global ia64_kptdir
 ia64_kptdir:   data8   0
 
@@ -151,58 +148,51 @@ ENTRY_NOPROFILE(exception_save, 0)
 }
 {      .mmi
        mov             ar.rsc=0
-       sub             r19=r23,r30
-       add             r31=8,r30
-       ;;
-}
-{      .mmi
        mov             r22=cr.iip
-       nop             0
        addl            r29=NTLBRT_SAVE,r0      // 22-bit restart token.
        ;;
 }
 
        /*
-        * We have a 1KB aligned trapframe, pointed to by sp. If we write
-        * to the trapframe, we may trigger a data nested TLB fault. By
-        * aligning the trapframe on a 1KB boundary, we guarantee that if
-        * we get a data nested TLB fault, it will be on the very first
-        * write. Since the data nested TLB fault does not preserve any
-        * state, we have to be careful what we clobber. Consequently, we
-        * have to be careful what we use here. Below a list of registers
-        * that are currently alive:
+        * We have a 1KB aligned trapframe, pointed to by r30. We can't
+        * reliably write to the trapframe using virtual addressing, due
+        * to the fact that TC entries we depend on can be removed by:
+        * 1.  ptc.g instructions issued by other threads/cores/CPUs, or
+        * 2.  TC modifications in another thread on the same core.
+        * When our TC entry gets removed, we get nested TLB faults and
+        * since no state is saved, we can only deal with those when
+        * explicitly coded and expected.
+        * As such, we switch to physical addressing and account for the
+        * fact that the tpa instruction can cause a nested TLB fault.
+        * Since the data nested TLB fault does not preserve any state,
+        * we have to be careful what we clobber. Consequently, we have
+        * to be careful what we use here. Below a list of registers that
+        * are considered alive:
         *      r16,r17=arguments
         *      r18=pr, r19=length, r20=unat, r21=rsc, r22=iip, r23=TOS
-        *      r29=restart point
-        *      r30,r31=trapframe pointers
+        *      r29=restart token
+        *      r30=trapframe pointers
         *      p14,p15=memory stack switch
         */
-
-       /* PTC.G enter non-exclusive */
-       mov     r24 = ar.ccv
-       movl    r25 = pmap_ptc_g_sem
-       ;;
-.ptc_g_0:
-       ld8.acq r26 = [r25]
-       ;;
-       tbit.nz p12, p0 = r26, 63
-(p12)  br.cond.spnt.few .ptc_g_0
-       ;;
-       mov     ar.ccv = r26
-       adds    r27 = 1, r26
+exception_save_restart:
+       tpa             r24=r30                 // Nested TLB fault possible
+       sub             r19=r23,r30
+       nop             0
        ;;
-       cmpxchg8.rel    r27 = [r25], r27, ar.ccv
+
+       rsm             psr.dt
+       add             r29=16,r19              // Clobber restart token
+       mov             r30=r24
        ;;
-       cmp.ne  p12, p0 = r26, r27
-(p12)  br.cond.spnt.few .ptc_g_0
+       srlz.d
+       add             r31=8,r24
        ;;
-       mov     ar.ccv = r24
 
-exception_save_restart:
+       // r18=pr, r19=length, r20=unat, r21=rsc, r22=iip, r23=TOS
+       // r29=delta
 {      .mmi
        st8             [r30]=r19,16            // length
        st8             [r31]=r0,16             // flags
-       add             r29=16,r19              // Clobber restart token
        ;;
 }
 {      .mmi
@@ -218,6 +208,7 @@ exception_save_restart:
        ;;
 }
        // r18=pr, r19=rnat, r20=bspstore, r21=rsc, r22=iip, r23=rp
+       // r24=pfs
 {      .mmi
        st8             [r30]=r23,16            // rp
        st8             [r31]=r18,16            // pr
@@ -275,7 +266,7 @@ exception_save_restart:
        sub             r18=r18,r20
        ;;
 }
-       // r19=ifs, r22=iip
+       // r18=ndirty, r19=ifs, r22=iip
 {      .mmi
        st8             [r31]=r18,16            // ndirty
        st8             [r30]=r19,16            // cfm
@@ -431,27 +422,10 @@ exception_save_restart:
        ;;
 }
 {      .mlx
-       ssm             psr.ic|psr.dfh
+       ssm             psr.dt|psr.ic|psr.dfh
        movl            gp=__gp
        ;;
 }
-
-       /* PTC.G leave non-exclusive */
-       srlz.d
-       movl    r25 = pmap_ptc_g_sem
-       ;;
-.ptc_g_1:
-       ld8.acq r26 = [r25]
-       ;;
-       mov     ar.ccv = r26
-       adds    r27 = -1, r26
-       ;;
-       cmpxchg8.rel    r27 = [r25], r27, ar.ccv
-       ;;
-       cmp.ne  p12, p0 = r26, r27
-(p12)  br.cond.spnt.few .ptc_g_1
-       ;;
-
 {      .mib
        srlz.d
        nop             0
@@ -469,34 +443,52 @@ END(exception_save)
 ENTRY_NOPROFILE(exception_restore, 0)
 {      .mmi
        rsm             psr.i
-       add             r3=SIZEOF_TRAPFRAME-16,sp
-       add             r2=SIZEOF_TRAPFRAME,sp
+       add             sp=16,sp
+       nop             0
        ;;
 }
-{      .mmi
+
+       // The next instruction can fault. Let it be...
+       tpa             r9=sp
+       ;;
+       rsm             psr.dt|psr.ic
+       add             r8=SIZEOF_SPECIAL+16,r9
+       ;;
        srlz.d
-       add             r8=SIZEOF_SPECIAL+32,sp
-       nop             0
+       add             r2=SIZEOF_TRAPFRAME-16,r9
+       add             r3=SIZEOF_TRAPFRAME-32,r9
        ;;
-}
-       // The next load can trap. Let it be...
+
+{      .mmi
        ldf.fill        f15=[r2],-32            // f15
        ldf.fill        f14=[r3],-32            // f14
-       add             sp=16,sp
+       nop             0
        ;;
+}
+{      .mmi
        ldf.fill        f13=[r2],-32            // f13
        ldf.fill        f12=[r3],-32            // f12
+       nop             0
        ;;
+}
+{      .mmi
        ldf.fill        f11=[r2],-32            // f11
        ldf.fill        f10=[r3],-32            // f10
+       nop             0
        ;;
+}
+{      .mmi
        ldf.fill        f9=[r2],-32             // f9
        ldf.fill        f8=[r3],-32             // f8
+       nop             0
        ;;
+}
+{      .mmi
        ldf.fill        f7=[r2],-24             // f7
        ldf.fill        f6=[r3],-16             // f6
+       nop             0
        ;;
-
+}
 {      .mmi
        ld8             r8=[r8]                 // unat (after)
        ;;
@@ -553,53 +545,53 @@ ENTRY_NOPROFILE(exception_restore, 0)
        bsw.0
        ;;
 }
+{      .mii
+       ld8             r16=[r9]                // tf_length
+       add             r31=16,r9
+       add             r30=24,r9
+}
 {      .mmi
        ld8.fill        r15=[r3],-16            // r15
        ld8.fill        r14=[r2],-16            // r14
-       add             r31=16,sp
+       nop             0
        ;;
 }
 {      .mmi
-       ld8             r16=[sp]                // tf_length
        ld8.fill        r11=[r3],-16            // r11
-       add             r30=24,sp
-       ;;
-}
-{      .mmi
        ld8.fill        r10=[r2],-16            // r10
-       ld8.fill        r9=[r3],-16             // r9
        add             r16=r16,sp              // ar.k7
        ;;
 }
 {      .mmi
+       ld8.fill        r9=[r3],-16             // r9
        ld8.fill        r8=[r2],-16             // r8
-       ld8.fill        r3=[r3]                 // r3
+       nop             0
        ;;
 }
-       // We want nested TLB faults from here on...
-       rsm             psr.ic|psr.i
+{      .mmi
+       ld8.fill        r3=[r3]                 // r3
        ld8.fill        r2=[r2]                 // r2
        nop             0
        ;;
-       srlz.d
-       ld8.fill        sp=[r31],16             // sp
-       nop             0
-       ;;
+}
 
+       ld8.fill        sp=[r31],16             // sp
        ld8             r17=[r30],16            // unat
-       ld8             r29=[r31],16            // rp
        ;;
+       ld8             r29=[r31],16            // rp
        ld8             r18=[r30],16            // pr
+       ;;
        ld8             r28=[r31],16            // pfs
+       ld8             r20=[r30],24            // bspstore
        mov             rp=r29
        ;;
-       ld8             r20=[r30],24            // bspstore
        ld8             r21=[r31],24            // rnat
        mov             ar.pfs=r28
        ;;
        ld8.fill        r26=[r30],16            // tp
        ld8             r22=[r31],16            // rsc
        ;;
+
 {      .mmi
        ld8             r23=[r30],16            // fpsr
        ld8             r24=[r31],16            // psr
@@ -636,6 +628,11 @@ ENTRY_NOPROFILE(exception_restore, 0)
        addl            r29=NTLBRT_RESTORE,r0   // 22-bit restart token 
        ;;
 }
+
+       ssm             psr.dt
+       ;;
+       srlz.d
+
 exception_restore_restart:
 {      .mmi
        mov             r30=ar.bspstore
@@ -1015,15 +1012,33 @@ IVT_ENTRY(Data_Nested_TLB, 0x1400)
        // here are direct mapped region 7 addresses, we have no problem
        // constructing physical addresses.
 
-{      .mlx
+{      .mmi
+       mov             cr.ifa=r30
+       mov             r26=rr[r30]
+       extr.u          r27=r30,61,3
+       ;;
+}
+{      .mii
        nop             0
-       movl            r27=ia64_kptdir
+       dep             r26=0,r26,0,2
+       cmp.eq          p12,p13=7,r27
        ;;
 }
 {      .mii
-       ld8             r27=[r27]
-       extr.u          r28=r30,3*PAGE_SHIFT-8, PAGE_SHIFT-3    // dir L0 index
-       extr.u          r26=r30,2*PAGE_SHIFT-5, PAGE_SHIFT-3    // dir L1 index
+       mov             cr.itir=r26
+(p12)  dep             r28=0,r30,61,3
+(p13)  extr.u          r28=r30,3*PAGE_SHIFT-8, PAGE_SHIFT-3    // dir L0 index
+       ;;
+}
+{      .mlx
+(p12)  add             
r28=PTE_PRESENT+PTE_ACCESSED+PTE_DIRTY+PTE_PL_KERN+PTE_AR_RWX+PTE_MA_WB,r28
+(p13)  movl            r27=ia64_kptdir
+       ;;
+}
+{      .mib
+(p13)  ld8             r27=[r27]
+(p13)  extr.u          r26=r30,2*PAGE_SHIFT-5, PAGE_SHIFT-3    // dir L1 index
+(p12)  br.cond.spnt.few 1f
        ;;
 }
 {      .mmi
@@ -1040,58 +1055,48 @@ IVT_ENTRY(Data_Nested_TLB, 0x1400)
        extr.u          r28=r30,PAGE_SHIFT,PAGE_SHIFT-5         // pte index
        ;;
 }
-{      .mmi
+{      .mii
        shladd          r27=r26,3,r27
+       shl             r28=r28,5
        ;;
-       mov             r26=rr[r30]
        dep             r27=0,r27,61,3
        ;;
 }
-{      .mii
        ld8             r27=[r27]                               // pte page
-       shl             r28=r28,5
-       dep             r26=0,r26,0,2
        ;;
-}
-{      .mmi
        add             r27=r28,r27
        ;;
-       mov             cr.ifa=r30
        dep             r27=0,r27,61,3
        ;;
-}
-{      .mmi
-       ld8             r28=[r27]               // pte
+       ld8             r28=[r27]                               // pte
        ;;
-       mov             cr.itir=r26
        or              r28=PTE_DIRTY+PTE_ACCESSED,r28
        ;;
-}
-{      .mmi
        st8             [r27]=r28
        ;;
-       addl            r26=NTLBRT_SAVE,r0
-       addl            r27=NTLBRT_RESTORE,r0
-}
+       ssm             psr.dt
+       ;;
+1:
 {      .mmi
        itc.d           r28
        ;;
-       ssm             psr.dt
-       cmp.eq          p12,p0=r29,r26
+       addl            r26=NTLBRT_SAVE,r0
+       addl            r27=NTLBRT_RESTORE,r0
        ;;
 }
-{      .mib
+{      .mmi
        srlz.d
+       cmp.eq          p12,p0=r29,r26
        cmp.eq          p13,p0=r29,r27
-(p12)  br.cond.sptk.few        exception_save_restart
        ;;
 }
-{      .mib
-       nop             0
+{      .mbb
        nop             0
+(p12)  br.cond.sptk.few        exception_save_restart
 (p13)  br.cond.sptk.few        exception_restore_restart
        ;;
 }
+
 {      .mlx
        mov             r26=ar.bsp
        movl            r29=kstack

Modified: head/sys/ia64/ia64/pmap.c
==============================================================================
--- head/sys/ia64/ia64/pmap.c   Thu Jun 30 19:23:17 2011        (r223699)
+++ head/sys/ia64/ia64/pmap.c   Thu Jun 30 20:34:55 2011        (r223700)
@@ -179,7 +179,7 @@ static uint64_t pmap_ptc_e_count2 = 2;
 static uint64_t pmap_ptc_e_stride1 = 0x2000;
 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
 
-extern volatile u_long pmap_ptc_g_sem;
+struct mtx pmap_ptc_mutex;
 
 /*
  * Data for the RID allocator
@@ -338,6 +338,8 @@ pmap_bootstrap()
                       pmap_ptc_e_stride1,
                       pmap_ptc_e_stride2);
 
+       mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
+
        /*
         * Setup RIDs. RIDs 0..7 are reserved for the kernel.
         *
@@ -528,11 +530,11 @@ pmap_invalidate_page(vm_offset_t va)
 {
        struct ia64_lpte *pte;
        struct pcpu *pc;
-       uint64_t tag, sem;
-       register_t is;
+       uint64_t tag;
        u_int vhpt_ofs;
 
        critical_enter();
+
        vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
        tag = ia64_ttag(va);
        STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
@@ -540,34 +542,16 @@ pmap_invalidate_page(vm_offset_t va)
                atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
        }
 
-       /* PTC.G enter exclusive */
-       is = intr_disable();
-
-       /* Atomically assert writer after all writers have gone. */
-       do {
-               /* Wait until there's no more writer. */
-               do {
-                       sem = atomic_load_acq_long(&pmap_ptc_g_sem);
-                       tag = sem | (1ul << 63);
-               } while (sem == tag);
-       } while (!atomic_cmpset_rel_long(&pmap_ptc_g_sem, sem, tag));
-
-       /* Wait until all readers are gone. */
-       tag = (1ul << 63);
-       do {
-               sem = atomic_load_acq_long(&pmap_ptc_g_sem);
-       } while (sem != tag);
+       mtx_lock_spin(&pmap_ptc_mutex);
 
        ia64_ptc_ga(va, PAGE_SHIFT << 2);
        ia64_mf();
        ia64_srlz_i();
 
-       /* PTC.G leave exclusive */
-       atomic_store_rel_long(&pmap_ptc_g_sem, 0);
+       mtx_unlock_spin(&pmap_ptc_mutex);
 
        ia64_invala();
 
-       intr_restore(is);
        critical_exit();
 }
 
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to