Module Name: src Committed By: martin Date: Mon Jun 15 07:48:08 UTC 2015
Modified Files: src/sys/arch/sparc64/include: cpu.h src/sys/arch/sparc64/sparc64: mp_subr.S pmap.c Log Message: Slightly optimize the ITLB usage on secondary processors To generate a diff of this commit: cvs rdiff -u -r1.114 -r1.115 src/sys/arch/sparc64/include/cpu.h cvs rdiff -u -r1.8 -r1.9 src/sys/arch/sparc64/sparc64/mp_subr.S cvs rdiff -u -r1.295 -r1.296 src/sys/arch/sparc64/sparc64/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/sparc64/include/cpu.h diff -u src/sys/arch/sparc64/include/cpu.h:1.114 src/sys/arch/sparc64/include/cpu.h:1.115 --- src/sys/arch/sparc64/include/cpu.h:1.114 Sat Jan 3 11:22:14 2015 +++ src/sys/arch/sparc64/include/cpu.h Mon Jun 15 07:48:08 2015 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.114 2015/01/03 11:22:14 palle Exp $ */ +/* $NetBSD: cpu.h,v 1.115 2015/06/15 07:48:08 martin Exp $ */ /* * Copyright (c) 1992, 1993 @@ -268,7 +268,7 @@ extern vaddr_t cpu_spinup_trampoline; extern char *mp_tramp_code; extern u_long mp_tramp_code_len; -extern u_long mp_tramp_tlb_slots; +extern u_long mp_tramp_dtlb_slots, mp_tramp_itlb_slots; extern u_long mp_tramp_func; extern u_long mp_tramp_ci; Index: src/sys/arch/sparc64/sparc64/mp_subr.S diff -u src/sys/arch/sparc64/sparc64/mp_subr.S:1.8 src/sys/arch/sparc64/sparc64/mp_subr.S:1.9 --- src/sys/arch/sparc64/sparc64/mp_subr.S:1.8 Sun Nov 2 19:40:06 2014 +++ src/sys/arch/sparc64/sparc64/mp_subr.S Mon Jun 15 07:48:08 2015 @@ -1,4 +1,4 @@ -/* $NetBSD: mp_subr.S,v 1.8 2014/11/02 19:40:06 palle Exp $ */ +/* $NetBSD: mp_subr.S,v 1.9 2015/06/15 07:48:08 martin Exp $ */ /* * Copyright (c) 2006-2010 Matthew R. Green @@ -214,19 +214,20 @@ ENTRY(sparc64_ipi_flush_pte_sun4v) .text .align 32 1: rd %pc, %l0 - LDULNG [%l0 + (4f-1b)], %l1 ! Load tlb slot count + LDULNG [%l0 + (3f-1b)], %l1 ! Load itlb slot count LDULNG [%l0 + (7f-1b)], %g2 ! Load cpu_args address. add %l0, (6f-1b), %l2 ! tlb slots ld [%g2 + CBA_CPUTYP], %g3 ! Load cputype clr %l3 -2: cmp %l3, %l1 - be CCCR, 3f +.Litlb_loop: + cmp %l3, %l1 + be CCCR, .Litlb_done nop ldx [%l2 + TTE_VPN], %l4 ldx [%l2 + TTE_DATA], %l5 #ifdef SUN4V cmp %g3, CPU_SUN4V - bne,pt %icc, 8f + bne,pt %icc, .Litlb_4u nop ! sun4v mov %l4, %o0 ! vaddr @@ -236,10 +237,10 @@ ENTRY(sparc64_ipi_flush_pte_sun4v) mov FT_MMU_MAP_PERM_ADDR, %o5 ! hv fast trap function ta ST_FAST_TRAP cmp %o0, 0 - be,pt %icc, 9f + be,pt %icc, .Litlb_next nop sir ! crash if mapping fails -8: +.Litlb_4u: #endif ! sun4u wr %g0, ASI_DMMU, %asi @@ -248,19 +249,60 @@ ENTRY(sparc64_ipi_flush_pte_sun4v) wr %g0, ASI_IMMU, %asi stxa %l4, [%g0 + TLB_TAG_ACCESS] %asi stxa %l5, [%g0] ASI_IMMU_DATA_IN -9: +.Litlb_next: membar #Sync flush %l4 add %l2, PTE_SIZE, %l2 add %l3, 1, %l3 - ba %xcc, 2b + ba %xcc, .Litlb_loop nop -3: LDULNG [%l0 + (5f-1b)], %l1 ! Load function +.Litlb_done: + ! continue the same loop (with indices and pointers et al), + ! but load a new upper limit and do not push the entries into + ! the itlb + LDULNG [%l0 + (4f-1b)], %l1 ! Load dtlb slot count +.Ldtlb_loop: + cmp %l3, %l1 + be CCCR, .Ldtlb_done + nop + ldx [%l2 + TTE_VPN], %l4 + ldx [%l2 + TTE_DATA], %l5 +#ifdef SUN4V + cmp %g3, CPU_SUN4V + bne,pt %icc, .Ldtlb_4u + nop + ! sun4v + mov %l4, %o0 ! vaddr + clr %o1 ! reserved + mov %l5, %o2 ! tte + mov MAP_DTLB, %o3 ! flags + mov FT_MMU_MAP_PERM_ADDR, %o5 ! hv fast trap function + ta ST_FAST_TRAP + cmp %o0, 0 + be,pt %icc, .Ldtlb_next + nop + sir ! crash if mapping fails +.Ldtlb_4u: +#endif + ! sun4u + wr %g0, ASI_DMMU, %asi + stxa %l4, [%g0 + TLB_TAG_ACCESS] %asi + stxa %l5, [%g0] ASI_DMMU_DATA_IN +.Ldtlb_next: + membar #Sync + flush %l4 + add %l2, PTE_SIZE, %l2 + add %l3, 1, %l3 + ba %xcc, .Ldtlb_loop + nop +.Ldtlb_done: + LDULNG [%l0 + (5f-1b)], %l1 ! Load function jmpl %l1, %g0 nop .align PTRSZ 4: ULONG 0x0 +3: ULONG 0x0 5: ULONG 0x0 7: ULONG 0x0 _ALIGN @@ -276,8 +318,10 @@ DATA(mp_tramp_code) POINTER 1b DATA(mp_tramp_code_len) ULONG 6b-1b -DATA(mp_tramp_tlb_slots) +DATA(mp_tramp_dtlb_slots) ULONG 4b-1b +DATA(mp_tramp_itlb_slots) + ULONG 3b-1b DATA(mp_tramp_func) ULONG 5b-1b DATA(mp_tramp_ci) Index: src/sys/arch/sparc64/sparc64/pmap.c diff -u src/sys/arch/sparc64/sparc64/pmap.c:1.295 src/sys/arch/sparc64/sparc64/pmap.c:1.296 --- src/sys/arch/sparc64/sparc64/pmap.c:1.295 Sun Jun 14 19:05:27 2015 +++ src/sys/arch/sparc64/sparc64/pmap.c Mon Jun 15 07:48:08 2015 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.295 2015/06/14 19:05:27 martin Exp $ */ +/* $NetBSD: pmap.c,v 1.296 2015/06/15 07:48:08 martin Exp $ */ /* * * Copyright (C) 1996-1999 Eduardo Horvath. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.295 2015/06/14 19:05:27 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.296 2015/06/15 07:48:08 martin Exp $"); #undef NO_VCACHE /* Don't forget the locked TLB in dostart */ #define HWREF @@ -525,7 +525,8 @@ pmap_mp_init(void) } memcpy(v, mp_tramp_code, mp_tramp_code_len); - *(u_long *)(v + mp_tramp_tlb_slots) = kernel_dtlb_slots; + *(u_long *)(v + mp_tramp_dtlb_slots) = kernel_dtlb_slots; + *(u_long *)(v + mp_tramp_itlb_slots) = kernel_itlb_slots; *(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup; *(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args; tp = (pte_t *)(v + mp_tramp_code_len);