The diff below optimizes TLB flushes a bit, attempting to flush a whole range instead of individual pages in pmap_remove() and only do a flush when we actually insert a mapping in pmap_enter().
This survived a make build on my machine, but some further testing would be appreciated. Index: arch/riscv64/riscv64/pmap.c =================================================================== RCS file: /cvs/src/sys/arch/riscv64/riscv64/pmap.c,v retrieving revision 1.20 diff -u -p -r1.20 pmap.c --- arch/riscv64/riscv64/pmap.c 14 Sep 2021 16:21:21 -0000 1.20 +++ arch/riscv64/riscv64/pmap.c 29 Sep 2021 22:03:58 -0000 @@ -65,6 +65,33 @@ do_tlb_flush_page(pmap_t pm, vaddr_t va) } void +do_tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva) +{ +#ifdef MULTIPROCESSOR + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + unsigned long hart_mask = 0; + + CPU_INFO_FOREACH(cii, ci) { + if (ci == curcpu()) + continue; + if (pmap_is_active(pm, ci)) + hart_mask |= (1UL << ci->ci_hartid); + } + + if (hart_mask != 0) + sbi_remote_sfence_vma(&hart_mask, sva, eva - sva); +#endif + + if (pmap_is_active(pm, curcpu())) { + while (sva < eva) { + sfence_vma_page(sva); + sva += PAGE_SIZE; + } + } +} + +void do_tlb_flush(pmap_t pm) { #ifdef MULTIPROCESSOR @@ -95,6 +122,15 @@ tlb_flush_page(pmap_t pm, vaddr_t va) do_tlb_flush_page(pm, va); } +void +tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva) +{ + if (cpu_errata_sifive_cip_1200) + do_tlb_flush(pm); + else + do_tlb_flush_range(pm, sva, eva); +} + static inline void icache_flush(void) { @@ -480,6 +516,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ int cache = PMAP_CACHE_WB; int need_sync; + KASSERT((va & PAGE_MASK) == 0); + if (pa & PMAP_NOCACHE) cache = PMAP_CACHE_CI; if (pa & PMAP_DEVICE) @@ -490,6 +528,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ pted = pmap_vp_lookup(pm, va, NULL); if (pted && PTED_VALID(pted)) { pmap_remove_pted(pm, pted); + tlb_flush_page(pm, va); /* we lost our pted if it was user */ if (pm != pmap_kernel()) pted = pmap_vp_lookup(pm, va, NULL); @@ -540,10 +579,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_ */ if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) { pmap_pte_insert(pted); + tlb_flush_page(pm, va); } - tlb_flush_page(pm, va & ~PAGE_MASK); - if (pg != NULL && (flags & PROT_EXEC)) { need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); @@ -582,6 +620,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd if (PTED_VALID(pted)) pmap_remove_pted(pm, pted); } + tlb_flush_range(pm, sva, eva); pmap_unlock(pm); } @@ -600,8 +639,6 @@ pmap_remove_pted(pmap_t pm, struct pte_d pmap_pte_remove(pted, pm != pmap_kernel()); - tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK); - if (pted->pted_va & PTED_VA_EXEC_M) { pted->pted_va &= ~PTED_VA_EXEC_M; } @@ -699,7 +736,6 @@ pmap_kremove_pg(vaddr_t va) * or that the mapping is not present in the hash table. */ pmap_pte_remove(pted, 0); - tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK); if (pted->pted_va & PTED_VA_EXEC_M) @@ -1514,6 +1550,8 @@ pmap_page_protect(struct vm_page *pg, vm mtx_leave(&pg->mdpage.pv_mtx); pmap_remove_pted(pm, pted); + tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK); + pmap_unlock(pm); pmap_destroy(pm);