Module Name:    src
Committed By:   martin
Date:           Mon Jun 15 07:48:08 UTC 2015

Modified Files:
        src/sys/arch/sparc64/include: cpu.h
        src/sys/arch/sparc64/sparc64: mp_subr.S pmap.c

Log Message:
Slightly optimize the ITLB usage on secondary processors


To generate a diff of this commit:
cvs rdiff -u -r1.114 -r1.115 src/sys/arch/sparc64/include/cpu.h
cvs rdiff -u -r1.8 -r1.9 src/sys/arch/sparc64/sparc64/mp_subr.S
cvs rdiff -u -r1.295 -r1.296 src/sys/arch/sparc64/sparc64/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc64/include/cpu.h
diff -u src/sys/arch/sparc64/include/cpu.h:1.114 src/sys/arch/sparc64/include/cpu.h:1.115
--- src/sys/arch/sparc64/include/cpu.h:1.114	Sat Jan  3 11:22:14 2015
+++ src/sys/arch/sparc64/include/cpu.h	Mon Jun 15 07:48:08 2015
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.114 2015/01/03 11:22:14 palle Exp $ */
+/*	$NetBSD: cpu.h,v 1.115 2015/06/15 07:48:08 martin Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -268,7 +268,7 @@ extern vaddr_t cpu_spinup_trampoline;
 
 extern  char   *mp_tramp_code;
 extern  u_long  mp_tramp_code_len;
-extern  u_long  mp_tramp_tlb_slots;
+extern  u_long  mp_tramp_dtlb_slots, mp_tramp_itlb_slots;
 extern  u_long  mp_tramp_func;
 extern  u_long  mp_tramp_ci;
 

Index: src/sys/arch/sparc64/sparc64/mp_subr.S
diff -u src/sys/arch/sparc64/sparc64/mp_subr.S:1.8 src/sys/arch/sparc64/sparc64/mp_subr.S:1.9
--- src/sys/arch/sparc64/sparc64/mp_subr.S:1.8	Sun Nov  2 19:40:06 2014
+++ src/sys/arch/sparc64/sparc64/mp_subr.S	Mon Jun 15 07:48:08 2015
@@ -1,4 +1,4 @@
-/*	$NetBSD: mp_subr.S,v 1.8 2014/11/02 19:40:06 palle Exp $	*/
+/*	$NetBSD: mp_subr.S,v 1.9 2015/06/15 07:48:08 martin Exp $	*/
 
 /*
  * Copyright (c) 2006-2010 Matthew R. Green
@@ -214,19 +214,20 @@ ENTRY(sparc64_ipi_flush_pte_sun4v)
 	.text
 	.align 32
 1:	rd	%pc, %l0
-	LDULNG	[%l0 + (4f-1b)], %l1	! Load tlb slot count
+	LDULNG	[%l0 + (3f-1b)], %l1	! Load itlb slot count
 	LDULNG	[%l0 + (7f-1b)], %g2	! Load cpu_args address.
 	add	%l0, (6f-1b), %l2	! tlb slots
 	ld	[%g2 + CBA_CPUTYP], %g3 ! Load cputype
 	clr	%l3
-2:	cmp	%l3, %l1
-	be	CCCR, 3f
+.Litlb_loop:
+	cmp	%l3, %l1
+	be	CCCR, .Litlb_done
 	 nop
 	ldx	[%l2 + TTE_VPN], %l4
 	ldx	[%l2 + TTE_DATA], %l5
 #ifdef SUN4V	
 	cmp	%g3, CPU_SUN4V
-	bne,pt	%icc, 8f
+	bne,pt	%icc, .Litlb_4u
 	 nop
 	! sun4v
 	mov	%l4, %o0			! vaddr
@@ -236,10 +237,10 @@ ENTRY(sparc64_ipi_flush_pte_sun4v)
 	mov	FT_MMU_MAP_PERM_ADDR, %o5	! hv fast trap function
 	ta	ST_FAST_TRAP
 	cmp	%o0, 0
-	be,pt	%icc, 9f
+	be,pt	%icc, .Litlb_next
 	 nop
 	sir					! crash if mapping fails
-8:
+.Litlb_4u:
 #endif	
 	! sun4u
 	wr	%g0, ASI_DMMU, %asi
@@ -248,19 +249,60 @@ ENTRY(sparc64_ipi_flush_pte_sun4v)
 	wr	%g0, ASI_IMMU, %asi
 	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
 	stxa	%l5, [%g0] ASI_IMMU_DATA_IN
-9:		
+.Litlb_next:
 	membar	#Sync
 	flush	%l4
 	add	%l2, PTE_SIZE, %l2
 	add	%l3, 1, %l3
-	ba	%xcc, 2b
+	ba	%xcc, .Litlb_loop
 	 nop
-3:	LDULNG	[%l0 + (5f-1b)], %l1	! Load function
+.Litlb_done:
+	! continue the same loop (with indices and pointers et al),
+	! but load a new upper limit and do not push the entries into
+	! the itlb
+	LDULNG	[%l0 + (4f-1b)], %l1	! Load dtlb slot count
+.Ldtlb_loop:
+	cmp	%l3, %l1
+	be	CCCR, .Ldtlb_done
+	 nop
+	ldx	[%l2 + TTE_VPN], %l4
+	ldx	[%l2 + TTE_DATA], %l5
+#ifdef SUN4V	
+	cmp	%g3, CPU_SUN4V
+	bne,pt	%icc, .Ldtlb_4u
+	 nop
+	! sun4v
+	mov	%l4, %o0			! vaddr
+	clr	%o1				! reserved
+	mov	%l5, %o2			! tte
+	mov	MAP_DTLB, %o3			! flags
+	mov	FT_MMU_MAP_PERM_ADDR, %o5	! hv fast trap function
+	ta	ST_FAST_TRAP
+	cmp	%o0, 0
+	be,pt	%icc, .Ldtlb_next
+	 nop
+	sir					! crash if mapping fails
+.Ldtlb_4u:
+#endif	
+	! sun4u
+	wr	%g0, ASI_DMMU, %asi
+	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
+	stxa	%l5, [%g0] ASI_DMMU_DATA_IN
+.Ldtlb_next:
+	membar	#Sync
+	flush	%l4
+	add	%l2, PTE_SIZE, %l2
+	add	%l3, 1, %l3
+	ba	%xcc, .Ldtlb_loop
+	 nop
+.Ldtlb_done:
+	LDULNG	[%l0 + (5f-1b)], %l1	! Load function
 	jmpl	%l1, %g0
 	 nop
 
 	.align PTRSZ
 4:	ULONG	0x0
+3:	ULONG	0x0
 5:	ULONG	0x0
 7:	ULONG	0x0
 	_ALIGN
@@ -276,8 +318,10 @@ DATA(mp_tramp_code)
 	POINTER	1b
 DATA(mp_tramp_code_len)
 	ULONG	6b-1b
-DATA(mp_tramp_tlb_slots)
+DATA(mp_tramp_dtlb_slots)
 	ULONG	4b-1b
+DATA(mp_tramp_itlb_slots)
+	ULONG	3b-1b
 DATA(mp_tramp_func)
 	ULONG	5b-1b
 DATA(mp_tramp_ci)

Index: src/sys/arch/sparc64/sparc64/pmap.c
diff -u src/sys/arch/sparc64/sparc64/pmap.c:1.295 src/sys/arch/sparc64/sparc64/pmap.c:1.296
--- src/sys/arch/sparc64/sparc64/pmap.c:1.295	Sun Jun 14 19:05:27 2015
+++ src/sys/arch/sparc64/sparc64/pmap.c	Mon Jun 15 07:48:08 2015
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.295 2015/06/14 19:05:27 martin Exp $	*/
+/*	$NetBSD: pmap.c,v 1.296 2015/06/15 07:48:08 martin Exp $	*/
 /*
  *
  * Copyright (C) 1996-1999 Eduardo Horvath.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.295 2015/06/14 19:05:27 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.296 2015/06/15 07:48:08 martin Exp $");
 
 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
 #define	HWREF
@@ -525,7 +525,8 @@ pmap_mp_init(void)
 	}
 
 	memcpy(v, mp_tramp_code, mp_tramp_code_len);
-	*(u_long *)(v + mp_tramp_tlb_slots) = kernel_dtlb_slots;
+	*(u_long *)(v + mp_tramp_dtlb_slots) = kernel_dtlb_slots;
+	*(u_long *)(v + mp_tramp_itlb_slots) = kernel_itlb_slots;
 	*(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup;
 	*(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args;
 	tp = (pte_t *)(v + mp_tramp_code_len);

Reply via email to