Module Name: src Committed By: matt Date: Thu Mar 11 08:19:01 UTC 2010
Modified Files: src/sys/arch/mips/include [matt-nb5-mips64]: cpu.h pmap.h vmparam.h src/sys/arch/mips/mips [matt-nb5-mips64]: cpu_subr.c pmap.c pmap_tlb.c trap.c Log Message: Add MP-aware icache support. To generate a diff of this commit: cvs rdiff -u -r1.90.16.26 -r1.90.16.27 src/sys/arch/mips/include/cpu.h cvs rdiff -u -r1.54.26.11 -r1.54.26.12 src/sys/arch/mips/include/pmap.h cvs rdiff -u -r1.41.28.12 -r1.41.28.13 src/sys/arch/mips/include/vmparam.h cvs rdiff -u -r1.1.2.3 -r1.1.2.4 src/sys/arch/mips/mips/cpu_subr.c cvs rdiff -u -r1.179.16.21 -r1.179.16.22 src/sys/arch/mips/mips/pmap.c cvs rdiff -u -r1.1.2.9 -r1.1.2.10 src/sys/arch/mips/mips/pmap_tlb.c cvs rdiff -u -r1.217.12.21 -r1.217.12.22 src/sys/arch/mips/mips/trap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/mips/include/cpu.h diff -u src/sys/arch/mips/include/cpu.h:1.90.16.26 src/sys/arch/mips/include/cpu.h:1.90.16.27 --- src/sys/arch/mips/include/cpu.h:1.90.16.26 Thu Mar 11 08:16:59 2010 +++ src/sys/arch/mips/include/cpu.h Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.90.16.26 2010/03/11 08:16:59 matt Exp $ */ +/* $NetBSD: cpu.h,v 1.90.16.27 2010/03/11 08:19:01 matt Exp $ */ /*- * Copyright (c) 1992, 1993 @@ -93,12 +93,17 @@ void *ci_fpsave_si; /* FP sync softint handler */ struct evcnt ci_evcnt_all_ipis; /* aggregated IPI counter */ struct evcnt ci_evcnt_per_ipi[NIPIS]; /* individual IPI counters*/ + struct evcnt ci_evcnt_synci_activate_rqst; + struct evcnt ci_evcnt_synci_onproc_rqst; + struct evcnt ci_evcnt_synci_deferred_rqst; + struct evcnt ci_evcnt_synci_ipi_rqst; #define CPUF_PRIMARY 0x01 /* CPU is primary CPU */ #define CPUF_PRESENT 0x02 /* CPU is present */ #define CPUF_RUNNING 0x04 /* CPU is running */ #define CPUF_PAUSED 0x08 /* CPU is paused */ #define CPUF_FPUSAVE 0x10 /* CPU is currently in fpusave_cpu() */ +#define CPUF_USERPMAP 0x20 /* CPU has a user pmap activated */ #endif }; Index: src/sys/arch/mips/include/pmap.h diff -u src/sys/arch/mips/include/pmap.h:1.54.26.11 src/sys/arch/mips/include/pmap.h:1.54.26.12 --- src/sys/arch/mips/include/pmap.h:1.54.26.11 Sat Feb 27 07:58:52 2010 +++ src/sys/arch/mips/include/pmap.h Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.54.26.11 2010/02/27 07:58:52 matt Exp $ */ +/* $NetBSD: pmap.h,v 1.54.26.12 2010/03/11 08:19:01 matt Exp $ */ /* * Copyright (c) 1992, 1993 @@ -179,10 +179,10 @@ }; struct pmap_tlb_info { + char ti_name[8]; uint32_t ti_asid_hint; /* probable next ASID to use */ uint32_t ti_asids_free; /* # of ASIDs free */ #define tlbinfo_noasids_p(ti) ((ti)->ti_asids_free == 0) - u_long ti_asid_bitmap[MIPS_TLB_NUM_PIDS / (sizeof(u_long) * 8)]; kmutex_t *ti_lock; u_int ti_wired; /* # of wired TLB entries */ uint32_t ti_asid_mask; @@ -190,13 +190,21 @@ LIST_HEAD(, pmap_asid_info) ti_pais; /* list of active ASIDs */ #ifdef MULTIPROCESSOR pmap_t ti_victim; + uint32_t ti_synci_page_bitmap; /* page indices needing a syncicache */ uint32_t ti_cpu_mask; /* bitmask of CPUs sharing this TLB */ enum tlb_invalidate_op ti_tlbinvop; u_int ti_index; #define tlbinfo_index(ti) ((ti)->ti_index) + struct evcnt ti_evcnt_synci_asts; + struct evcnt ti_evcnt_synci_all; + struct evcnt ti_evcnt_synci_pages; + struct evcnt ti_evcnt_synci_deferred; + struct evcnt ti_evcnt_synci_desired; + struct evcnt ti_evcnt_synci_duplicate; #else #define tlbinfo_index(ti) (0) #endif + u_long ti_asid_bitmap[256 / (sizeof(u_long) * 8)]; }; @@ -211,6 +219,10 @@ extern struct pmap_kernel kernel_pmap_store; extern struct pmap_tlb_info pmap_tlb0_info; +#ifdef MULTIPROCESSOR +extern struct pmap_tlb_info *pmap_tlbs[MAXCPUS]; +extern u_int pmap_ntlbs; +#endif extern paddr_t mips_avail_start; extern paddr_t mips_avail_end; extern vaddr_t mips_virtual_end; @@ -221,17 +233,12 @@ #define pmap_phys_address(x) mips_ptob(x) -static __inline void -pmap_remove_all(struct pmap *pmap) -{ - /* Nothing. */ -} - /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(void); +void pmap_remove_all(pmap_t); void pmap_set_modified(paddr_t); void pmap_procwr(struct proc *, vaddr_t, size_t); #define PMAP_NEED_PROCWR @@ -240,6 +247,9 @@ void pmap_tlb_shootdown_process(void); bool pmap_tlb_shootdown_bystanders(pmap_t pmap); void pmap_tlb_info_attach(struct pmap_tlb_info *, struct cpu_info *); +void pmap_tlb_syncicache_ast(struct cpu_info *); +void pmap_tlb_syncicache_wanted(struct cpu_info *); +void pmap_tlb_syncicache(vaddr_t, uint32_t); #endif void pmap_tlb_info_init(struct pmap_tlb_info *); void pmap_tlb_asid_acquire(pmap_t pmap, struct lwp *l); Index: src/sys/arch/mips/include/vmparam.h diff -u src/sys/arch/mips/include/vmparam.h:1.41.28.12 src/sys/arch/mips/include/vmparam.h:1.41.28.13 --- src/sys/arch/mips/include/vmparam.h:1.41.28.12 Tue Feb 23 20:33:47 2010 +++ src/sys/arch/mips/include/vmparam.h Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: vmparam.h,v 1.41.28.12 2010/02/23 20:33:47 matt Exp $ */ +/* $NetBSD: vmparam.h,v 1.41.28.13 2010/03/11 08:19:01 matt Exp $ */ /* * Copyright (c) 1992, 1993 @@ -251,8 +251,8 @@ u_int pvh_attrs; /* page attributes */ #define VM_PAGE_PVLIST_LOCK_INIT(pg) do { } while (/*CONSTCOND*/ 0) #define VM_PAGE_PVLIST_LOCKED_P(pg) true -#define VM_PAGE_PVLIST_LOCK(pg, lc) (0) -#define VM_PAGE_PVLIST_UNLOCK(pg) do { } while (/*CONSTCOND*/ 0) +#define VM_PAGE_PVLIST_LOCK(pg, lc) (mutex_spin_enter(&pmap_pvlist_mutex), 0) +#define VM_PAGE_PVLIST_UNLOCK(pg) mutex_spin_exit(&pmap_pvlist_mutex) #define VM_PAGE_PVLIST_GEN(pg) (0) #endif }; Index: src/sys/arch/mips/mips/cpu_subr.c diff -u src/sys/arch/mips/mips/cpu_subr.c:1.1.2.3 src/sys/arch/mips/mips/cpu_subr.c:1.1.2.4 --- src/sys/arch/mips/mips/cpu_subr.c:1.1.2.3 Mon Mar 1 23:54:49 2010 +++ src/sys/arch/mips/mips/cpu_subr.c Thu Mar 11 08:19:01 2010 @@ -32,7 +32,7 @@ #include "opt_multiprocessor.h" #include "opt_sa.h" -__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.1.2.3 2010/03/01 23:54:49 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.1.2.4 2010/03/11 08:19:01 matt Exp $"); #include <sys/param.h> #include <sys/cpu.h> @@ -186,6 +186,18 @@ cpu_info_last->ci_next = ci; cpu_info_last = ci; } + evcnt_attach_dynamic(&ci->ci_evcnt_synci_activate_rqst, + EVCNT_TYPE_MISC, NULL, device_xname(self), + "syncicache activate request"); + evcnt_attach_dynamic(&ci->ci_evcnt_synci_deferred_rqst, + EVCNT_TYPE_MISC, NULL, device_xname(self), + "syncicache deferred request"); + evcnt_attach_dynamic(&ci->ci_evcnt_synci_ipi_rqst, + EVCNT_TYPE_MISC, NULL, device_xname(self), + "syncicache ipi request"); + evcnt_attach_dynamic(&ci->ci_evcnt_synci_onproc_rqst, + EVCNT_TYPE_MISC, NULL, device_xname(self), + "syncicache onproc request"); /* * Initialize IPI framework for this cpu instance @@ -472,7 +484,7 @@ KASSERT(lwp_locked(l, NULL)); KASSERT(l->l_stat == LSONPROC || l->l_stat == LSRUN); - l->l_md.md_astpending = 1; /* force call to ast() */ + l->l_md.md_astpending = 1; /* force call to ast() */ } void Index: src/sys/arch/mips/mips/pmap.c diff -u src/sys/arch/mips/mips/pmap.c:1.179.16.21 src/sys/arch/mips/mips/pmap.c:1.179.16.22 --- src/sys/arch/mips/mips/pmap.c:1.179.16.21 Sun Feb 28 23:45:06 2010 +++ src/sys/arch/mips/mips/pmap.c Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.179.16.21 2010/02/28 23:45:06 matt Exp $ */ +/* $NetBSD: pmap.c,v 1.179.16.22 2010/03/11 08:19:01 matt Exp $ */ /*- * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.179.16.21 2010/02/28 23:45:06 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.179.16.22 2010/03/11 08:19:01 matt Exp $"); /* * Manages physical address maps. @@ -113,8 +113,13 @@ #include "opt_sysv.h" #include "opt_cputype.h" +#include "opt_multiprocessor.h" #include "opt_mips_cache.h" +#ifdef MULTIPROCESSOR +#define PMAP_NO_PV_UNCACHED +#endif + #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> @@ -253,7 +258,9 @@ #define PMAP_SIZE offsetof(struct pmap, pm_pai[MAXCPUS]) #else #define PMAP_SIZE sizeof(struct pmap) +kmutex_t pmap_pvlist_mutex __aligned(COHERENCY_UNIT); #endif + struct pmap_kernel kernel_pmap_store = { .kernel_pmap = { .pm_count = 1, @@ -281,9 +288,7 @@ } poolpage; #endif -#ifdef MULTIPROCESSOR static void pmap_pvlist_lock_init(void); -#endif /* * The pools from which pmap structures and sub-structures are allocated. @@ -365,6 +370,22 @@ static inline void pmap_page_syncicache(struct vm_page *pg) { +#ifdef MULTIPROCESSOR + pv_entry_t pv = &pg->mdpage.pvh_first; + uint32_t onproc = 0; + (void)VM_PAGE_PVLIST_LOCK(pg, false); + if (pv->pv_pmap != NULL) { + for (; pv != NULL; pv = pv->pv_next) { + onproc |= pv->pv_pmap->pm_onproc; + if (onproc == cpus_running) + break; + } + } + VM_PAGE_PVLIST_UNLOCK(pg); + kpreempt_disable(); + pmap_tlb_syncicache(pg->mdpage.pvh_first.pv_va, onproc); + kpreempt_enable(); +#else if (MIPS_HAS_R4K_MMU) { if (PG_MD_CACHED_P(pg)) { mips_icache_sync_range_index( @@ -374,6 +395,7 @@ mips_icache_sync_range(MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(pg)), PAGE_SIZE); } +#endif } static vaddr_t @@ -514,9 +536,7 @@ (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / NBPG; } #endif -#ifdef MULTIPROCESSOR pmap_pvlist_lock_init(); -#endif /* * Now actually allocate the kernel PTE array (must be done @@ -809,10 +829,12 @@ PMAP_COUNT(activate); + kpreempt_disable(); pmap_tlb_asid_acquire(pmap, l); if (l == curlwp) { pmap_segtab_activate(pmap, l); } + kpreempt_enable(); } /* @@ -825,6 +847,7 @@ #ifdef MULTIPROCESSOR kpreempt_disable(); + curcpu()->ci_pmap_segbase = (void *)(MIPS_KSEG2_START + 0x1eadbeef); pmap_tlb_asid_deactivate(l->l_proc->p_vmspace->vm_map.pmap); kpreempt_enable(); #endif @@ -861,6 +884,7 @@ __func__, pmap, sva, eva, pte, flags); } #endif + KASSERT(kpreempt_disabled()); for (; sva < eva; sva += NBPG, pte++) { struct vm_page *pg; @@ -983,12 +1007,13 @@ */ if (pv->pv_pmap != NULL) { while (pv != NULL) { - const uint32_t gen = VM_PAGE_PVLIST_GEN(pg); + const pmap_t pmap = pv->pv_pmap; + const uint16_t gen = VM_PAGE_PVLIST_GEN(pg); va = pv->pv_va; VM_PAGE_PVLIST_UNLOCK(pg); - pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, - prot); - pmap_update(pv->pv_pmap); + pmap_protect(pmap, va, va + PAGE_SIZE, prot); + KASSERT(pv->pv_pmap == pmap); + pmap_update(pmap); if (gen != VM_PAGE_PVLIST_LOCK(pg, false)) { pv = &pg->mdpage.pvh_first; } else { @@ -1011,10 +1036,11 @@ (void)VM_PAGE_PVLIST_LOCK(pg, false); pv = &pg->mdpage.pvh_first; while (pv->pv_pmap != NULL) { + const pmap_t pmap = pv->pv_pmap; va = pv->pv_va; VM_PAGE_PVLIST_UNLOCK(pg); - pmap_remove(pv->pv_pmap, va, va + PAGE_SIZE); - pmap_update(pv->pv_pmap); + pmap_remove(pmap, va, va + PAGE_SIZE); + pmap_update(pmap); (void)VM_PAGE_PVLIST_LOCK(pg, false); } VM_PAGE_PVLIST_UNLOCK(pg); @@ -1027,6 +1053,7 @@ { const uint32_t pg_mask = ~(mips_pg_m_bit() | mips_pg_ro_bit()); const uint32_t p = (flags & VM_PROT_WRITE) ? mips_pg_rw_bit() : mips_pg_ro_bit(); + KASSERT(kpreempt_disabled()); /* * Change protection on every valid mapping within this segment. */ @@ -1083,6 +1110,7 @@ p = (prot & VM_PROT_WRITE) ? mips_pg_rw_bit() : mips_pg_ro_bit(); + kpreempt_disable(); if (pmap == pmap_kernel()) { /* * Change entries in kernel pmap. @@ -1107,6 +1135,7 @@ pte->pt_entry = pt_entry; pmap_tlb_update_addr(pmap, sva, pt_entry, true); } + kpreempt_enable(); return; } @@ -1129,7 +1158,6 @@ /* * Change protection on every valid mapping within this segment. */ - kpreempt_disable(); pmap_pte_process(pmap, sva, eva, pmap_pte_protect, p); kpreempt_enable(); } @@ -1209,6 +1237,7 @@ { const uint32_t newmode = cached ? MIPS3_PG_CACHED : MIPS3_PG_UNCACHED; + KASSERT(kpreempt_disabled()); #ifdef DEBUG if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) printf("pmap_page_uncache(%#"PRIxPADDR")\n", VM_PAGE_TO_PHYS(pg)); @@ -1402,6 +1431,7 @@ #endif #endif + kpreempt_disable(); if (pmap == pmap_kernel()) { if (pg) pmap_enter_pv(pmap, va, pg, &npte); @@ -1432,12 +1462,11 @@ /* * Update the same virtual address entry. */ - pmap_tlb_update_addr(pmap, va, npte, resident); + kpreempt_enable(); return 0; } - kpreempt_disable(); pte = pmap_pte_reserve(pmap, va, flags); if (__predict_false(pte == NULL)) { kpreempt_enable(); @@ -1593,6 +1622,20 @@ kpreempt_enable(); } +void +pmap_remove_all(struct pmap *pmap) +{ + KASSERT(pmap != pmap_kernel()); + + kpreempt_disable(); + /* + * Free all of our ASIDs which means we can skip doing all the + * tlb_invalidate_addrs(). + */ + pmap_tlb_asid_deactivate(pmap); + pmap_tlb_asid_release_all(pmap); + kpreempt_enable(); +} /* * Routine: pmap_unwire * Function: Clear the wired attribute for a map/virtual-address @@ -1665,7 +1708,6 @@ if (pmapdebug & PDB_FOLLOW) printf("pmap_extract(%p, %#"PRIxVADDR") -> ", pmap, va); #endif - kpreempt_disable(); if (pmap == pmap_kernel()) { if (MIPS_KSEG0_P(va)) { pa = MIPS_KSEG0_TO_PHYS(va); @@ -1681,9 +1723,12 @@ if (MIPS_KSEG1_P(va)) panic("pmap_extract: kseg1 address %#"PRIxVADDR"", va); #endif - else - pte = kvtopte(va); + if (va >= mips_virtual_end) + panic("pmap_extract: illegal kernel mapped address %#"PRIxVADDR"", va); + pte = kvtopte(va); + kpreempt_disable(); } else { + kpreempt_disable(); if (!(pte = pmap_pte_lookup(pmap, va))) { #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) @@ -1702,11 +1747,11 @@ return false; } pa = mips_tlbpfn_to_paddr(pte->pt_entry) | (va & PGOFSET); + kpreempt_enable(); done: if (pap != NULL) { *pap = pa; } - kpreempt_enable(); #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pa %#"PRIxPADDR"\n", pa); @@ -1968,7 +2013,9 @@ pmap_enter_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, u_int *npte) { pv_entry_t pv, npv, apv; + int16_t gen; + KASSERT(kpreempt_disabled()); KASSERT(!MIPS_KSEG0_P(va)); KASSERT(!MIPS_KSEG1_P(va)); #ifdef _LP64 @@ -1982,8 +2029,7 @@ printf("pmap_enter: pv %p: was %#"PRIxVADDR"/%p/%p\n", pv, pv->pv_va, pv->pv_pmap, pv->pv_next); #endif - kpreempt_disable(); - (void)VM_PAGE_PVLIST_LOCK(pg, true); + gen = VM_PAGE_PVLIST_LOCK(pg, true); pmap_check_pvlist(pg); #if defined(MIPS3_NO_PV_UNCACHED) || defined(MULTIPROCESSOR) again: @@ -2093,7 +2139,6 @@ #endif PMAP_COUNT(remappings); VM_PAGE_PVLIST_UNLOCK(pg); - kpreempt_enable(); if (__predict_false(apv != NULL)) pmap_pv_free(apv); return; @@ -2111,7 +2156,6 @@ * so get the page generation. We allocate the PV, and * then reacquire the lock. */ - uint16_t gen = VM_PAGE_PVLIST_GEN(pg); VM_PAGE_PVLIST_UNLOCK(pg); #endif apv = (pv_entry_t)pmap_pv_alloc(); @@ -2123,7 +2167,9 @@ * tinkered with this page so we should * start over. */ - if (gen != VM_PAGE_PVLIST_LOCK(pg, true)) + uint16_t oldgen = gen; + gen = VM_PAGE_PVLIST_LOCK(pg, true); + if (gen != oldgen) goto again; #endif } @@ -2137,7 +2183,6 @@ } pmap_check_pvlist(pg); VM_PAGE_PVLIST_UNLOCK(pg); - kpreempt_enable(); if (__predict_false(apv != NULL)) pmap_pv_free(apv); } @@ -2160,7 +2205,7 @@ printf("pmap_remove_pv(%p, %#"PRIxVADDR", %#"PRIxPADDR")\n", pmap, va, VM_PAGE_TO_PHYS(pg)); #endif - + KASSERT(kpreempt_disabled()); pv = &pg->mdpage.pvh_first; (void)VM_PAGE_PVLIST_LOCK(pg, true); @@ -2317,6 +2362,12 @@ */ return gen; } +#else +static void +pmap_pvlist_lock_init(void) +{ + mutex_init(&pmap_pvlist_mutex, MUTEX_DEFAULT, IPL_VM); +} #endif /* MULTIPROCESSOR */ /* Index: src/sys/arch/mips/mips/pmap_tlb.c diff -u src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.9 src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.10 --- src/sys/arch/mips/mips/pmap_tlb.c:1.1.2.9 Mon Mar 1 23:53:26 2010 +++ src/sys/arch/mips/mips/pmap_tlb.c Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_tlb.c,v 1.1.2.9 2010/03/01 23:53:26 matt Exp $ */ +/* $NetBSD: pmap_tlb.c,v 1.1.2.10 2010/03/11 08:19:01 matt Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. @@ -31,7 +31,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.1.2.9 2010/03/01 23:53:26 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.1.2.10 2010/03/11 08:19:01 matt Exp $"); /* * Manages address spaces in a TLB. @@ -144,6 +144,7 @@ static kmutex_t pmap_tlb0_mutex __aligned(32); struct pmap_tlb_info pmap_tlb0_info = { + .ti_name = "tlb0", .ti_asid_hint = 1, .ti_asid_mask = MIPS_TLB_NUM_PIDS - 1, .ti_asid_max = MIPS_TLB_NUM_PIDS - 1, @@ -159,10 +160,12 @@ }; #ifdef MULTIPROCESSOR -static struct pmap_tlb_info *pmap_tlbs[MAXCPUS] = { +struct pmap_tlb_info *pmap_tlbs[MAXCPUS] = { [0] = &pmap_tlb0_info, }; -static u_int pmap_ntlbs = 1; +u_int pmap_ntlbs = 1; +u_int pmap_tlb_synci_page_mask; +u_int pmap_tlb_synci_map_mask; #endif #define __BITMAP_SET(bm, n) \ ((bm)[(n) / (8*sizeof(bm[0]))] |= 1LU << ((n) % (8*sizeof(bm[0])))) @@ -211,6 +214,31 @@ #endif /* MULTIPROCESSOR */ } +static void +pmap_tlb_attach_evcnt(struct pmap_tlb_info *ti) +{ +#ifdef MULTIPROCESSOR + evcnt_attach_dynamic(&ti->ti_evcnt_synci_desired, + EVCNT_TYPE_MISC, NULL, + ti->ti_name, "icache syncs desired"); + evcnt_attach_dynamic(&ti->ti_evcnt_synci_asts, + EVCNT_TYPE_MISC, &ti->ti_evcnt_synci_desired, + ti->ti_name, "icache sync asts"); + evcnt_attach_dynamic(&ti->ti_evcnt_synci_all, + EVCNT_TYPE_MISC, &ti->ti_evcnt_synci_asts, + ti->ti_name, "iacche full syncs"); + evcnt_attach_dynamic(&ti->ti_evcnt_synci_pages, + EVCNT_TYPE_MISC, &ti->ti_evcnt_synci_asts, + ti->ti_name, "icache pages synced"); + evcnt_attach_dynamic(&ti->ti_evcnt_synci_duplicate, + EVCNT_TYPE_MISC, &ti->ti_evcnt_synci_desired, + ti->ti_name, "icache dup pages skipped"); + evcnt_attach_dynamic(&ti->ti_evcnt_synci_deferred, + EVCNT_TYPE_MISC, &ti->ti_evcnt_synci_desired, + ti->ti_name, "icache pages deferred"); +#endif /* MULTIPROCESSOR */ +} + void pmap_tlb_info_init(struct pmap_tlb_info *ti) { @@ -231,6 +259,17 @@ ti->ti_asid_mask |= ti->ti_asid_mask >> 1; } } +#ifdef MULTIPROCESSOR + const u_int icache_way_pages = + mips_cache_info.mci_picache_way_size >> PGSHIFT; + KASSERT(icache_way_pages <= 8*sizeof(pmap_tlb_synci_page_mask)); + pmap_tlb_synci_page_mask = icache_way_pages - 1; + pmap_tlb_synci_map_mask = ~(~0 << icache_way_pages); + printf("tlb0: synci page mask %#x and map mask %#x used for %u pages\n", + pmap_tlb_synci_page_mask, pmap_tlb_synci_map_mask, + icache_way_pages); +#endif + pmap_tlb_attach_evcnt(ti); return; #ifdef MULTIPROCESSOR } @@ -247,6 +286,10 @@ ti->ti_victim = NULL; ti->ti_cpu_mask = 0; ti->ti_index = pmap_ntlbs++; + snprintf(ti->ti_name, sizeof(ti->ti_name), "tlb%u", ti->ti_index); + + pmap_tlb_attach_evcnt(ti); + /* * If we are reserving a tlb slot for mapping cpu_info, * allocate it now. @@ -299,6 +342,8 @@ static void pmap_tlb_asid_reinitialize(struct pmap_tlb_info *ti, enum tlb_invalidate_op op) { + const size_t asid_bitmap_words = + ti->ti_asid_max / (8 * sizeof(ti->ti_asid_bitmap[0])); /* * First, clear the ASID bitmap (except for ASID 0 which belongs * to the kernel). @@ -306,8 +351,8 @@ ti->ti_asids_free = ti->ti_asid_max; ti->ti_asid_hint = 1; ti->ti_asid_bitmap[0] = 1; - for (size_t i = 1; i < __arraycount(ti->ti_asid_bitmap); i++) - ti->ti_asid_bitmap[i] = 0; + for (size_t word = 1; word <= asid_bitmap_words; word++) + ti->ti_asid_bitmap[word] = 0; switch (op) { case TLBINV_ALL: @@ -332,11 +377,8 @@ if (__predict_false(asids_found >= ti->ti_asid_max / 2)) { tlb_invalidate_asids(1, ti->ti_asid_mask); ti->ti_asid_bitmap[0] = 1; - for (size_t i = 1; - i < __arraycount(ti->ti_asid_bitmap); - i++) { - ti->ti_asid_bitmap[i] = 0; - } + for (size_t word = 1; word <= asid_bitmap_words; word++) + ti->ti_asid_bitmap[word] = 0; } else { ti->ti_asids_free -= asids_found; } @@ -385,6 +427,9 @@ { struct cpu_info * const ci = curcpu(); struct pmap_tlb_info * const ti = ci->ci_tlb_info; +#ifdef DIAGNOSTIC + struct pmap * const curpmap = curlwp->l_proc->p_vmspace->vm_map.pmap; +#endif TLBINFO_LOCK(ti); @@ -409,7 +454,7 @@ * next called for this pmap, it will allocate a new * ASID. */ - KASSERT((curlwp->l_proc->p_vmspace->vm_map.pmap->pm_onproc & ti->ti_cpu_mask) == 0); + KASSERT((curpmap->pm_onproc & ti->ti_cpu_mask) == 0); pmap_pai_reset(ti, pai, PAI_PMAP(pai, ti)); } break; @@ -559,6 +604,8 @@ struct pmap_asid_info * const pai = PMAP_PAI(pm, ti); int rv = -1; + KASSERT(kpreempt_disabled()); + TLBINFO_LOCK(ti); if (pm == pmap_kernel() || PMAP_PAI_ASIDVALID_P(pai, ti)) { va |= pai->pai_asid << MIPS_TLB_PID_SHIFT; @@ -578,6 +625,8 @@ struct pmap_tlb_info * const ti = curcpu()->ci_tlb_info; struct pmap_asid_info * const pai = PMAP_PAI(pm, ti); + KASSERT(kpreempt_disabled()); + TLBINFO_LOCK(ti); if (pm == pmap_kernel() || PMAP_PAI_ASIDVALID_P(pai, ti)) { va |= pai->pai_asid << MIPS_TLB_PID_SHIFT; @@ -611,17 +660,14 @@ * a new one. */ if (__predict_false(TLBINFO_ASID_INUSE_P(ti, ti->ti_asid_hint))) { -#ifdef DIAGNOSTIC - const size_t words = __arraycount(ti->ti_asid_bitmap); -#endif const size_t nbpw = 8 * sizeof(ti->ti_asid_bitmap[0]); for (size_t i = 0; i < ti->ti_asid_hint / nbpw; i++) { KASSERT(~ti->ti_asid_bitmap[i] == 0); } for (size_t i = ti->ti_asid_hint / nbpw;; i++) { - KASSERT(i < words); + KASSERT(i <= ti->ti_asid_max / nbpw); /* - * ffs was to find the first bit set while we want the + * ffs wants to find the first bit set while we want * to find the first bit cleared. */ u_long bits = ~ti->ti_asid_bitmap[i]; @@ -674,6 +720,8 @@ struct pmap_tlb_info * const ti = ci->ci_tlb_info; struct pmap_asid_info * const pai = PMAP_PAI(pm, ti); + KASSERT(kpreempt_disabled()); + /* * Kernels use a fixed ASID of 0 and don't need to acquire one. */ @@ -703,7 +751,17 @@ * be changed while this TLBs lock is held. */ atomic_or_32(&pm->pm_onproc, 1 << cpu_index(ci)); -#endif + /* + * If this CPU has had exec pages changes that haven't been + * icache synched, make sure to do that before returning to + * userland. + */ + if (ti->ti_synci_page_bitmap) { + l->l_md.md_astpending = 1; /* force call to ast() */ + ci->ci_evcnt_synci_activate_rqst.ev_count++; + } + atomic_or_ulong(&ci->ci_flags, CPUF_USERPMAP); +#endif /* MULTIPROCESSOR */ tlb_set_asid(pai->pai_asid); } TLBINFO_UNLOCK(ti); @@ -721,6 +779,7 @@ void pmap_tlb_asid_deactivate(pmap_t pm) { + KASSERT(kpreempt_disabled()); #ifdef MULTIPROCESSOR /* * The kernel pmap is aways onproc and active and must never have @@ -736,6 +795,7 @@ */ atomic_and_32(&pm->pm_onproc, ~(1 << cpu_index(ci))); TLBINFO_UNLOCK(ti); + atomic_and_ulong(&ci->ci_flags, ~CPUF_USERPMAP); } #endif } @@ -759,3 +819,157 @@ } #endif /* MULTIPROCESSOR */ } + +#ifdef MULTIPROCESSOR +void +pmap_tlb_syncicache_ast(struct cpu_info *ci) +{ + struct pmap_tlb_info * const ti = ci->ci_tlb_info; + + kpreempt_disable(); + uint32_t page_bitmap = atomic_swap_32(&ti->ti_synci_page_bitmap, 0); +#if 0 + printf("%s: need to sync %#x\n", __func__, page_bitmap); +#endif + ti->ti_evcnt_synci_asts.ev_count++; + /* + * If every bit is set in the bitmap, sync the entire icache. + */ + if (page_bitmap == pmap_tlb_synci_map_mask) { + mips_icache_sync_all(); + ti->ti_evcnt_synci_all.ev_count++; + ti->ti_evcnt_synci_pages.ev_count += pmap_tlb_synci_page_mask+1; + kpreempt_enable(); + return; + } + + /* + * Loop through the bitmap clearing each set of indices for each page. + */ + for (vaddr_t va = 0; + page_bitmap != 0; + page_bitmap >>= 1, va += PAGE_SIZE) { + if (page_bitmap & 1) { + /* + * Each bit set represents a page to be synced. + */ + mips_icache_sync_range_index(va, PAGE_SIZE); + ti->ti_evcnt_synci_pages.ev_count++; + } + } + + kpreempt_enable(); +} + +void +pmap_tlb_syncicache(vaddr_t va, uint32_t page_onproc) +{ + KASSERT(kpreempt_disabled()); + /* + * We don't sync the icache here but let ast do it for us just before + * returning to userspace. We do this because we don't really know + * on which CPU we will return to userspace and if we synch the icache + * now it might not be on the CPU we need it on. In addition, others + * threads might sync the icache before we get to return to userland + * so there's no reason for us to do it. + * + * Each TLB/cache keeps a synci sequence number which gets advanced + * each that TLB/cache performs a mips_sync_icache_all. When we + * return to userland, we check the pmap's corresponding synci + * sequence number for that TLB/cache. If they match, it means that + * no one has yet synched the icache so we much do it ourselves. If + * they don't match someone has already synced the icache for us. + * + * There is a small chance that the generation numbers will wrap and + * then become equal but that's a one in 4 billion cache and will + * just cause an extra sync of the icache. + */ + const uint32_t cpu_mask = 1L << cpu_index(curcpu()); + const uint32_t page_mask = + 1L << ((va >> PGSHIFT) & pmap_tlb_synci_page_mask); + uint32_t onproc = 0; + for (size_t i = 0; i < pmap_ntlbs; i++) { + struct pmap_tlb_info * const ti = pmap_tlbs[0]; + TLBINFO_LOCK(ti); + for (;;) { + uint32_t old_page_bitmap = ti->ti_synci_page_bitmap; + if (old_page_bitmap & page_mask) { + ti->ti_evcnt_synci_duplicate.ev_count++; + break; + } + + uint32_t orig_page_bitmap = atomic_cas_32( + &ti->ti_synci_page_bitmap, old_page_bitmap, + old_page_bitmap | page_mask); + + if (orig_page_bitmap == old_page_bitmap) { + if (old_page_bitmap == 0) { + onproc |= ti->ti_cpu_mask; + } else { + ti->ti_evcnt_synci_deferred.ev_count++; + } + ti->ti_evcnt_synci_desired.ev_count++; + break; + } + } +#if 0 + printf("%s: %s: %x to %x on cpus %#x\n", __func__, + ti->ti_name, page_mask, ti->ti_synci_page_bitmap, + onproc & page_onproc & ti->ti_cpu_mask); +#endif + TLBINFO_UNLOCK(ti); + } + onproc &= page_onproc; + if (__predict_false(onproc != 0)) { + /* + * If the cpu need to sync this page, tell the current lwp + * to sync the icache before it returns to userspace. + */ + if (onproc & cpu_mask) { + if (curcpu()->ci_flags & CPUF_USERPMAP) { + curlwp->l_md.md_astpending = 1; /* force call to ast() */ + curcpu()->ci_evcnt_synci_onproc_rqst.ev_count++; + } else { + curcpu()->ci_evcnt_synci_deferred_rqst.ev_count++; + } + onproc ^= cpu_mask; + } + + /* + * For each cpu that is affect, send an IPI telling + * that CPU that the current thread needs to sync its icache. + * We might cause some spurious icache syncs but that's not + * going to break anything. + */ + for (u_int n = ffs(onproc); + onproc != 0; + onproc >>= n, onproc <<= n, n = ffs(onproc)) { + cpu_send_ipi(cpu_lookup(n-1), IPI_SYNCICACHE); + } + } +} + +void +pmap_tlb_syncicache_wanted(struct cpu_info *ci) +{ + struct pmap_tlb_info * const ti = ci->ci_tlb_info; + + KASSERT(cpu_intr_p()); + + TLBINFO_LOCK(ti); + + /* + * We might have been notified because another CPU changed an exec + * page and now needs us to sync the icache so tell the current lwp + * to do the next time it returns to userland (which should be very + * soon). + */ + if (ti->ti_synci_page_bitmap && (ci->ci_flags & CPUF_USERPMAP)) { + curlwp->l_md.md_astpending = 1; /* force call to ast() */ + ci->ci_evcnt_synci_ipi_rqst.ev_count++; + } + + TLBINFO_UNLOCK(ti); + +} +#endif /* MULTIPROCESSOR */ Index: src/sys/arch/mips/mips/trap.c diff -u src/sys/arch/mips/mips/trap.c:1.217.12.21 src/sys/arch/mips/mips/trap.c:1.217.12.22 --- src/sys/arch/mips/mips/trap.c:1.217.12.21 Sun Feb 28 23:45:06 2010 +++ src/sys/arch/mips/mips/trap.c Thu Mar 11 08:19:01 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.217.12.21 2010/02/28 23:45:06 matt Exp $ */ +/* $NetBSD: trap.c,v 1.217.12.22 2010/03/11 08:19:01 matt Exp $ */ /* * Copyright (c) 1992, 1993 @@ -78,7 +78,7 @@ #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.217.12.21 2010/02/28 23:45:06 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.217.12.22 2010/03/11 08:19:01 matt Exp $"); #include "opt_cputype.h" /* which mips CPU levels do we support? */ #include "opt_ddb.h" @@ -312,7 +312,8 @@ panic("utlbmod: no pte"); pt_entry = pte->pt_entry; if (!mips_pg_v(pt_entry)) - panic("utlbmod: invalid pte"); + panic("utlbmod: va %"PRIxVADDR" invalid pte %08x @ %p", + vaddr, pt_entry, pte); if (pt_entry & mips_pg_ro_bit()) { /* write to read only page */ ftype = VM_PROT_WRITE; @@ -604,13 +605,20 @@ void ast(void) { - struct lwp *l = curlwp; - struct cpu_info *ci = l->l_cpu; + struct lwp * const l = curlwp; + struct cpu_info * const ci = l->l_cpu; + u_int astpending; - while (l->l_md.md_astpending) { + while ((astpending = l->l_md.md_astpending) != 0) { uvmexp.softs++; l->l_md.md_astpending = 0; +#ifdef MULTIPROCESSOR + if (ci->ci_tlb_info->ti_synci_page_bitmap != 0) + pmap_tlb_syncicache_ast(ci); + KASSERT(ci->ci_tlb_info->ti_synci_page_bitmap == 0); +#endif + if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l);