The diff below optimizes TLB flushes a bit, attempting to flush a
whole range instead of individual pages in pmap_remove() and only do a
flush when we actually insert a mapping in pmap_enter().

This survived a make build on my machine, but some further testing
would be appreciated.


Index: arch/riscv64/riscv64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/riscv64/riscv64/pmap.c,v
retrieving revision 1.20
diff -u -p -r1.20 pmap.c
--- arch/riscv64/riscv64/pmap.c 14 Sep 2021 16:21:21 -0000      1.20
+++ arch/riscv64/riscv64/pmap.c 29 Sep 2021 22:03:58 -0000
@@ -65,6 +65,33 @@ do_tlb_flush_page(pmap_t pm, vaddr_t va)
 }
 
 void
+do_tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+#ifdef MULTIPROCESSOR
+       CPU_INFO_ITERATOR cii;
+       struct cpu_info *ci;
+       unsigned long hart_mask = 0;
+
+       CPU_INFO_FOREACH(cii, ci) {
+               if (ci == curcpu())
+                       continue;
+               if (pmap_is_active(pm, ci))
+                       hart_mask |= (1UL << ci->ci_hartid);
+       }
+
+       if (hart_mask != 0)
+               sbi_remote_sfence_vma(&hart_mask, sva, eva - sva);
+#endif
+
+       if (pmap_is_active(pm, curcpu())) {
+               while (sva < eva) {
+                       sfence_vma_page(sva);
+                       sva += PAGE_SIZE;
+               }
+       }
+}
+
+void
 do_tlb_flush(pmap_t pm)
 {
 #ifdef MULTIPROCESSOR
@@ -95,6 +122,15 @@ tlb_flush_page(pmap_t pm, vaddr_t va)
                do_tlb_flush_page(pm, va);
 }
 
+void
+tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+       if (cpu_errata_sifive_cip_1200)
+               do_tlb_flush(pm);
+       else
+               do_tlb_flush_range(pm, sva, eva);
+}
+
 static inline void
 icache_flush(void)
 {
@@ -480,6 +516,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
        int cache = PMAP_CACHE_WB;
        int need_sync;
 
+       KASSERT((va & PAGE_MASK) == 0);
+
        if (pa & PMAP_NOCACHE)
                cache = PMAP_CACHE_CI;
        if (pa & PMAP_DEVICE)
@@ -490,6 +528,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
        pted = pmap_vp_lookup(pm, va, NULL);
        if (pted && PTED_VALID(pted)) {
                pmap_remove_pted(pm, pted);
+               tlb_flush_page(pm, va);
                /* we lost our pted if it was user */
                if (pm != pmap_kernel())
                        pted = pmap_vp_lookup(pm, va, NULL);
@@ -540,10 +579,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
         */
        if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) {
                pmap_pte_insert(pted);
+               tlb_flush_page(pm, va);
        }
 
-       tlb_flush_page(pm, va & ~PAGE_MASK);
-
        if (pg != NULL && (flags & PROT_EXEC)) {
                need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
                atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
@@ -582,6 +620,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
                if (PTED_VALID(pted))
                        pmap_remove_pted(pm, pted);
        }
+       tlb_flush_range(pm, sva, eva);
        pmap_unlock(pm);
 }
 
@@ -600,8 +639,6 @@ pmap_remove_pted(pmap_t pm, struct pte_d
 
        pmap_pte_remove(pted, pm != pmap_kernel());
 
-       tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
-
        if (pted->pted_va & PTED_VA_EXEC_M) {
                pted->pted_va &= ~PTED_VA_EXEC_M;
        }
@@ -699,7 +736,6 @@ pmap_kremove_pg(vaddr_t va)
         * or that the mapping is not present in the hash table.
         */
        pmap_pte_remove(pted, 0);
-
        tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
 
        if (pted->pted_va & PTED_VA_EXEC_M)
@@ -1514,6 +1550,8 @@ pmap_page_protect(struct vm_page *pg, vm
                mtx_leave(&pg->mdpage.pv_mtx);
 
                pmap_remove_pted(pm, pted);
+               tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
+
                pmap_unlock(pm);
                pmap_destroy(pm);
 

Reply via email to