The diff below optimizes TLB flushes a bit, attempting to flush a
whole range instead of individual pages in pmap_remove() and only do a
flush when we actually insert a mapping in pmap_enter().
This survived a make build on my machine, but some further testing
would be appreciated.
Index: arch/riscv64/riscv64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/riscv64/riscv64/pmap.c,v
retrieving revision 1.20
diff -u -p -r1.20 pmap.c
--- arch/riscv64/riscv64/pmap.c 14 Sep 2021 16:21:21 -0000 1.20
+++ arch/riscv64/riscv64/pmap.c 29 Sep 2021 22:03:58 -0000
@@ -65,6 +65,33 @@ do_tlb_flush_page(pmap_t pm, vaddr_t va)
}
void
+do_tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+#ifdef MULTIPROCESSOR
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ unsigned long hart_mask = 0;
+
+ CPU_INFO_FOREACH(cii, ci) {
+ if (ci == curcpu())
+ continue;
+ if (pmap_is_active(pm, ci))
+ hart_mask |= (1UL << ci->ci_hartid);
+ }
+
+ if (hart_mask != 0)
+ sbi_remote_sfence_vma(&hart_mask, sva, eva - sva);
+#endif
+
+ if (pmap_is_active(pm, curcpu())) {
+ while (sva < eva) {
+ sfence_vma_page(sva);
+ sva += PAGE_SIZE;
+ }
+ }
+}
+
+void
do_tlb_flush(pmap_t pm)
{
#ifdef MULTIPROCESSOR
@@ -95,6 +122,15 @@ tlb_flush_page(pmap_t pm, vaddr_t va)
do_tlb_flush_page(pm, va);
}
+void
+tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+ if (cpu_errata_sifive_cip_1200)
+ do_tlb_flush(pm);
+ else
+ do_tlb_flush_range(pm, sva, eva);
+}
+
static inline void
icache_flush(void)
{
@@ -480,6 +516,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
int cache = PMAP_CACHE_WB;
int need_sync;
+ KASSERT((va & PAGE_MASK) == 0);
+
if (pa & PMAP_NOCACHE)
cache = PMAP_CACHE_CI;
if (pa & PMAP_DEVICE)
@@ -490,6 +528,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
pted = pmap_vp_lookup(pm, va, NULL);
if (pted && PTED_VALID(pted)) {
pmap_remove_pted(pm, pted);
+ tlb_flush_page(pm, va);
/* we lost our pted if it was user */
if (pm != pmap_kernel())
pted = pmap_vp_lookup(pm, va, NULL);
@@ -540,10 +579,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
*/
if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) {
pmap_pte_insert(pted);
+ tlb_flush_page(pm, va);
}
- tlb_flush_page(pm, va & ~PAGE_MASK);
-
if (pg != NULL && (flags & PROT_EXEC)) {
need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
@@ -582,6 +620,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
if (PTED_VALID(pted))
pmap_remove_pted(pm, pted);
}
+ tlb_flush_range(pm, sva, eva);
pmap_unlock(pm);
}
@@ -600,8 +639,6 @@ pmap_remove_pted(pmap_t pm, struct pte_d
pmap_pte_remove(pted, pm != pmap_kernel());
- tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
-
if (pted->pted_va & PTED_VA_EXEC_M) {
pted->pted_va &= ~PTED_VA_EXEC_M;
}
@@ -699,7 +736,6 @@ pmap_kremove_pg(vaddr_t va)
* or that the mapping is not present in the hash table.
*/
pmap_pte_remove(pted, 0);
-
tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
if (pted->pted_va & PTED_VA_EXEC_M)
@@ -1514,6 +1550,8 @@ pmap_page_protect(struct vm_page *pg, vm
mtx_leave(&pg->mdpage.pv_mtx);
pmap_remove_pted(pm, pted);
+ tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
+
pmap_unlock(pm);
pmap_destroy(pm);