Module Name:    src
Committed By:   mrg
Date:           Sun Mar 28 05:24:01 UTC 2010

Modified Files:
        src/sys/arch/sparc64/include: cpu.h
        src/sys/arch/sparc64/sparc64: cache.h ipifuncs.c pmap.c

Log Message:
- add a kmutex_t ci_ctx_lock to struct cpu_info, and initialise it in
  cpu_pmap_init() and replace pmap_ctx_lock usage with this new ci_ctx_lock

- replace smp_dcache_flush_page_all() with smp_dcache_flush_page_cpuset(),
  that flushes only on the set of CPUs not, everyone.  add new
  dcache_flush_page_cpuset() to flush this page from the D$ only
  on the specified set of CPUs.

- add a cpuset to pmap_free_page() and use it when freeing PTE pages
  when a pmap is destroyed

- introduce pmap_free_page_noflush(), and use it when we allocated a
  page for PTEs but didn't use it and don't need to flush it

- don't bother with pmap_lock in pmap_extract(), the only potential
  issue is pseg_get() which is already safe

tested on sb2000, sb2500 and ultra80 with a bunch of various heavy
workloads, and seems to give a clear 1-2% speed up for high-forking /
short lived processes, such as ./configure.


To generate a diff of this commit:
cvs rdiff -u -r1.89 -r1.90 src/sys/arch/sparc64/include/cpu.h
cvs rdiff -u -r1.17 -r1.18 src/sys/arch/sparc64/sparc64/cache.h
cvs rdiff -u -r1.35 -r1.36 src/sys/arch/sparc64/sparc64/ipifuncs.c
cvs rdiff -u -r1.261 -r1.262 src/sys/arch/sparc64/sparc64/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc64/include/cpu.h
diff -u src/sys/arch/sparc64/include/cpu.h:1.89 src/sys/arch/sparc64/include/cpu.h:1.90
--- src/sys/arch/sparc64/include/cpu.h:1.89	Sat Mar  6 08:08:29 2010
+++ src/sys/arch/sparc64/include/cpu.h	Sun Mar 28 05:24:00 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.89 2010/03/06 08:08:29 mrg Exp $ */
+/*	$NetBSD: cpu.h,v 1.90 2010/03/28 05:24:00 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -151,7 +151,10 @@
  * the right pointer and you get to the pmap segment tables.  These are
  * physical addresses, of course.
  *
+ * ci_ctx_lock protects this CPUs context allocation/free.
+ * These are all allocated almost with in the same cacheline.
  */
+	kmutex_t		ci_ctx_lock;
 	int			ci_pmap_next_ctx;
 	int			ci_numctx;
 	paddr_t 		*ci_ctxbusy;

Index: src/sys/arch/sparc64/sparc64/cache.h
diff -u src/sys/arch/sparc64/sparc64/cache.h:1.17 src/sys/arch/sparc64/sparc64/cache.h:1.18
--- src/sys/arch/sparc64/sparc64/cache.h:1.17	Mon Mar  8 08:59:06 2010
+++ src/sys/arch/sparc64/sparc64/cache.h	Sun Mar 28 05:24:00 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: cache.h,v 1.17 2010/03/08 08:59:06 mrg Exp $ */
+/*	$NetBSD: cache.h,v 1.18 2010/03/28 05:24:00 mrg Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -144,14 +144,16 @@
 
 #ifdef MULTIPROCESSOR
 void smp_tlb_flush_pte(vaddr_t, struct pmap *);
-void smp_dcache_flush_page_all(paddr_t pa);
+void smp_dcache_flush_page_cpuset(paddr_t pa, sparc64_cpuset_t);
 void smp_blast_dcache(sparc64_cpuset_t);
 #define	tlb_flush_pte(va,pm	)	smp_tlb_flush_pte(va, pm)
-#define	dcache_flush_page_all(pa)	smp_dcache_flush_page_all(pa)
+#define	dcache_flush_page_all(pa)	smp_dcache_flush_page_cpuset(pa, cpus_active)
+#define	dcache_flush_page_cpuset(pa,cs)	smp_dcache_flush_page_cpuset(pa, cs)
 #define	blast_dcache()			smp_blast_dcache(cpus_active)
 #else
 #define	tlb_flush_pte(va,pm)		sp_tlb_flush_pte(va, (pm)->pm_ctx[0])
 #define	dcache_flush_page_all(pa)	dcache_flush_page(pa)
+#define	dcache_flush_page_cpuset(pa,cs)	dcache_flush_page(pa)
 #define	blast_dcache()			sp_blast_dcache(dcache_size, \
 							dcache_line_size)
 #endif

Index: src/sys/arch/sparc64/sparc64/ipifuncs.c
diff -u src/sys/arch/sparc64/sparc64/ipifuncs.c:1.35 src/sys/arch/sparc64/sparc64/ipifuncs.c:1.36
--- src/sys/arch/sparc64/sparc64/ipifuncs.c:1.35	Mon Mar  8 08:59:06 2010
+++ src/sys/arch/sparc64/sparc64/ipifuncs.c	Sun Mar 28 05:24:00 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: ipifuncs.c,v 1.35 2010/03/08 08:59:06 mrg Exp $ */
+/*	$NetBSD: ipifuncs.c,v 1.36 2010/03/28 05:24:00 mrg Exp $ */
 
 /*-
  * Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ipifuncs.c,v 1.35 2010/03/08 08:59:06 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ipifuncs.c,v 1.36 2010/03/28 05:24:00 mrg Exp $");
 
 #include "opt_ddb.h"
 
@@ -412,10 +412,10 @@
 }
 
 /*
- * Make sure this page is flushed from all CPUs.
+ * Make sure this page is flushed from all/some CPUs.
  */
 void
-smp_dcache_flush_page_all(paddr_t pa)
+smp_dcache_flush_page_cpuset(paddr_t pa, sparc64_cpuset_t activecpus)
 {
 	ipifunc_t func;
 
@@ -424,7 +424,7 @@
 	else
 		func = sparc64_ipi_dcache_flush_page_us;
 
-	sparc64_broadcast_ipi(func, pa, dcache_line_size);
+	sparc64_multicast_ipi(activecpus, func, pa, dcache_line_size);
 	dcache_flush_page(pa);
 }
 

Index: src/sys/arch/sparc64/sparc64/pmap.c
diff -u src/sys/arch/sparc64/sparc64/pmap.c:1.261 src/sys/arch/sparc64/sparc64/pmap.c:1.262
--- src/sys/arch/sparc64/sparc64/pmap.c:1.261	Sun Mar 21 22:38:08 2010
+++ src/sys/arch/sparc64/sparc64/pmap.c	Sun Mar 28 05:24:00 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.261 2010/03/21 22:38:08 mrg Exp $	*/
+/*	$NetBSD: pmap.c,v 1.262 2010/03/28 05:24:00 mrg Exp $	*/
 /*
  *
  * Copyright (C) 1996-1999 Eduardo Horvath.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.261 2010/03/21 22:38:08 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.262 2010/03/28 05:24:00 mrg Exp $");
 
 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
 #define	HWREF
@@ -323,14 +323,14 @@
 
 #define pv_check()
 
-static int pmap_get_page(paddr_t *p);
-static void pmap_free_page(paddr_t pa);
+static int pmap_get_page(paddr_t *);
+static void pmap_free_page(paddr_t, sparc64_cpuset_t);
+static void pmap_free_page_noflush(paddr_t);
 
 /*
- * Global pmap lock.
+ * Global pmap locks.
  */
 static kmutex_t pmap_lock;
-static kmutex_t pmap_ctx_lock;
 static bool lock_available = false;
 
 /*
@@ -1226,6 +1226,7 @@
 {
 	size_t ctxsize;
 
+	mutex_init(&ci->ci_ctx_lock, MUTEX_SPIN, IPL_VM);
 	ci->ci_pmap_next_ctx = 1;
 #ifdef SUN4V
 #error find out if we have 16 or 13 bit context ids
@@ -1295,7 +1296,6 @@
 	vm_num_phys = avail_end - avail_start;
 
 	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
-	mutex_init(&pmap_ctx_lock, MUTEX_SPIN, IPL_VM);
 #if defined(USE_LOCKSAFE_PSEG_GETSET)
 	mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
 #endif
@@ -1410,6 +1410,9 @@
 {
 #ifdef MULTIPROCESSOR
 	struct cpu_info *ci;
+	sparc64_cpuset_t pmap_cpus_active;
+#else
+#define pmap_cpus_active 0
 #endif
 	struct vm_page *pg, *nextpg;
 
@@ -1417,26 +1420,36 @@
 		return;
 	}
 	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
-	mutex_enter(&pmap_ctx_lock);
 #ifdef MULTIPROCESSOR
+	CPUSET_CLEAR(pmap_cpus_active);
 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
-		if (CPUSET_HAS(cpus_active, ci->ci_index))
-			ctx_free(pm, ci);
+		/* XXXMRG: Move the lock inside one or both tests? */
+		mutex_enter(&ci->ci_ctx_lock);
+		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
+			if (pm->pm_ctx[ci->ci_index] > 0) {
+				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
+				ctx_free(pm, ci);
+			}
+		}
+		mutex_exit(&ci->ci_ctx_lock);
 	}
 #else
-	ctx_free(pm, curcpu());
+	if (pmap_ctx(pm)) {
+		mutex_enter(&curcpu()->ci_ctx_lock);
+		ctx_free(pm, curcpu());
+		mutex_exit(&curcpu()->ci_ctx_lock);
+	}
 #endif
-	mutex_exit(&pmap_ctx_lock);
 
 	/* we could be a little smarter and leave pages zeroed */
 	for (pg = TAILQ_FIRST(&pm->pm_obj.memq); pg != NULL; pg = nextpg) {
 		nextpg = TAILQ_NEXT(pg, listq.queue);
 		TAILQ_REMOVE(&pm->pm_obj.memq, pg, listq.queue);
 		KASSERT(pg->mdpage.mdpg_pvh.pv_pmap == NULL);
-		dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
+		dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
 		uvm_pagefree(pg);
 	}
-	pmap_free_page((paddr_t)(u_long)pm->pm_segs);
+	pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
 	UVM_OBJ_DESTROY(&pm->pm_obj);
 	pool_cache_put(&pmap_cache, pm);
 }
@@ -1555,7 +1568,7 @@
 		/* We allocated a spare page but didn't use it.  Free it. */
 		printf("pmap_kenter_pa: freeing unused page %llx\n",
 		       (long long)ptp);
-		pmap_free_page(ptp);
+		pmap_free_page_noflush(ptp);
 	}
 #ifdef DEBUG
 	i = ptelookup_va(va);
@@ -1826,7 +1839,7 @@
 		/* We allocated a spare page but didn't use it.  Free it. */
 		printf("pmap_enter: freeing unused page %llx\n",
 		       (long long)ptp);
-		pmap_free_page(ptp);
+		pmap_free_page_noflush(ptp);
 	}
 	if (dopv) {
 		pmap_enter_pv(pm, va, pa, pg, npv);
@@ -1924,22 +1937,36 @@
 	write_user_windows();
 	pm->pm_refs = 0;
 
-	mutex_enter(&pmap_ctx_lock);
+	/*
+	 * XXXMRG: pmap_destroy() does exactly the same dance here.
+	 * surely one of them isn't necessary?
+	 */
 #ifdef MULTIPROCESSOR
 	CPUSET_CLEAR(pmap_cpus_active);
 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
+		/* XXXMRG: Move the lock inside one or both tests? */
+		mutex_enter(&ci->ci_ctx_lock);
 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
-			if (pm->pm_ctx[ci->ci_index] > 0)
+			if (pm->pm_ctx[ci->ci_index] > 0) {
 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
-			ctx_free(pm, ci);
+				ctx_free(pm, ci);
+			}
 		}
+		mutex_exit(&ci->ci_ctx_lock);
 	}
 #else
-	ctx_free(pm, curcpu());
+	if (pmap_ctx(pm)) {
+		mutex_enter(&curcpu()->ci_ctx_lock);
+		ctx_free(pm, curcpu());
+		mutex_exit(&curcpu()->ci_ctx_lock);
+	}
 #endif
-	mutex_exit(&pmap_ctx_lock);
 
 	REMOVE_STAT(flushes);
+	/*
+	 * XXXMRG: couldn't we do something less severe here, and
+	 * only flush the right context on each CPU?
+	 */
 #ifdef MULTIPROCESSOR
 	smp_blast_dcache(pmap_cpus_active);
 #else
@@ -2021,7 +2048,8 @@
 			continue;
 
 		/*
-		 * if the pmap is being torn down, don't bother flushing.
+		 * if the pmap is being torn down, don't bother flushing,
+		 * we already have done so.
 		 */
 
 		if (!pm->pm_refs)
@@ -2166,9 +2194,6 @@
 			*pap = pa;
 		return TRUE;
 	} else {
-		if (pm != pmap_kernel()) {
-			mutex_enter(&pmap_lock);
-		}
 		data = pseg_get(pm, va);
 		pa = data & TLB_PA_MASK;
 #ifdef DEBUG
@@ -2200,9 +2225,6 @@
 			printf(" pseg_get: %lx\n", (long)pa);
 		}
 #endif
-		if (pm != pmap_kernel()) {
-			mutex_exit(&pmap_lock);
-		}
 	}
 	if ((data & TLB_V) == 0)
 		return (FALSE);
@@ -3073,7 +3095,7 @@
 
 	KASSERT(pm != pmap_kernel());
 	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
-	mutex_enter(&pmap_ctx_lock);
+	mutex_enter(&curcpu()->ci_ctx_lock);
 	ctx = curcpu()->ci_pmap_next_ctx++;
 
 	/*
@@ -3108,7 +3130,7 @@
 	curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
 	LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
 	pmap_ctx(pm) = ctx;
-	mutex_exit(&pmap_ctx_lock);
+	mutex_exit(&curcpu()->ci_ctx_lock);
 	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n",
 		cpu_number(), ctx));
 	return ctx;
@@ -3123,7 +3145,7 @@
 	int oldctx;
 	int cpunum;
 
-	KASSERT(mutex_owned(&pmap_ctx_lock));
+	KASSERT(mutex_owned(&curcpu()->ci_ctx_lock));
 
 #ifdef MULTIPROCESSOR
 	cpunum = ci->ci_index;
@@ -3382,14 +3404,21 @@
 }
 
 static void
-pmap_free_page(paddr_t pa)
+pmap_free_page(paddr_t pa, sparc64_cpuset_t cs)
 {
 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
 
-	dcache_flush_page_all(pa);
+	dcache_flush_page_cpuset(pa, cs);
 	uvm_pagefree(pg);
 }
 
+static void
+pmap_free_page_noflush(paddr_t pa)
+{
+	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
+
+	uvm_pagefree(pg);
+}
 
 #ifdef DDB
 
@@ -3609,7 +3638,7 @@
 
 	pmap_remove(pmap_kernel(), va, va+1);
 	pmap_update(pmap_kernel());
-	pmap_free_page(pa);
+	pmap_free_page(pa, cpus_active);
 }
 #endif
 

Reply via email to