Author: jhb
Date: Mon Oct 15 18:56:54 2018
New Revision: 339367
URL: https://svnweb.freebsd.org/changeset/base/339367

Log:
  Various fixes for TLB management on RISC-V.
  
  - Remove the arm64-specific cpu_*cache* and cpu_tlb_flush* functions.
    Instead, add RISC-V specific inline functions in cpufunc.h for the
    fence.i and sfence.vma instructions.
  - Catch up to changes in the arm64 pmap and remove all the cpu_dcache_*
    calls, pmap_is_current, pmap_l3_valid_cacheable, and PTE_NEXT bits from
    pmap.
  - Remove references to the unimplemented riscv_setttb().
  - Remove unused cpu_nullop.
  - Add a link to the SBI doc to sbi.h.
  - Add support for a 4th argument in SBI calls.  It's not documented but
    it seems implied for the asid argument to SBI_REMOVE_SFENCE_VMA_ASID.
  - Pass the arguments from sbi_remote_sfence*() to the SEE.  BBL ignores
    them so this is just cosmetic.
  - Flush icaches on other CPUs when they resume from kdb in case the
    debugger wrote any breakpoints while the CPUs were paused in the IPI_STOP
    handler.
  - Add SMP vs UP versions of pmap_invalidate_* similar to amd64.  The
    UP versions just use simple fences.  The SMP versions use the
    sbi_remove_sfence*() functions to perform TLB shootdowns.  Since we
    don't have a valid pm_active field in the riscv pmap, just IPI all
    CPUs for all invalidations for now.
  - Remove an extraneous TLB flush from the end of pmap_bootstrap().
  - Don't do a TLB flush when writing new mappings in pmap_enter(), only if
    modifying an existing mapping.  Note that for COW faults a TLB flush is
    only performed after explicitly clearing the old mapping as is done in
    other pmaps.
  - Sync the i-cache on all harts before updating the PTE for executable
    mappings in pmap_enter and pmap_enter_quick.  Previously the i-cache was
    only sync'd after updating the PTE in pmap_enter.
  - Use sbi_remote_fence() instead of smp_rendezvous in pmap_sync_icache().
  
  Reviewed by:  markj
  Approved by:  re (gjb, kib)
  Sponsored by: DARPA
  Differential Revision:        https://reviews.freebsd.org/D17414

Modified:
  head/sys/cddl/dev/fbt/riscv/fbt_isa.c
  head/sys/riscv/include/cpufunc.h
  head/sys/riscv/include/kdb.h
  head/sys/riscv/include/sbi.h
  head/sys/riscv/riscv/cpufunc_asm.S
  head/sys/riscv/riscv/db_interface.c
  head/sys/riscv/riscv/mp_machdep.c
  head/sys/riscv/riscv/pmap.c

Modified: head/sys/cddl/dev/fbt/riscv/fbt_isa.c
==============================================================================
--- head/sys/cddl/dev/fbt/riscv/fbt_isa.c       Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/cddl/dev/fbt/riscv/fbt_isa.c       Mon Oct 15 18:56:54 2018        
(r339367)
@@ -78,11 +78,11 @@ fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t 
        switch(fbt->fbtp_patchval) {
        case FBT_C_PATCHVAL:
                *(uint16_t *)fbt->fbtp_patchpoint = (uint16_t)val;
-               cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 2);
+               fence_i();
                break;
        case FBT_PATCHVAL:
                *fbt->fbtp_patchpoint = val;
-               cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 4);
+               fence_i();
                break;
        };
 }

Modified: head/sys/riscv/include/cpufunc.h
==============================================================================
--- head/sys/riscv/include/cpufunc.h    Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/include/cpufunc.h    Mon Oct 15 18:56:54 2018        
(r339367)
@@ -81,29 +81,32 @@ intr_enable(void)
        );
 }
 
-#define        cpu_nullop()                    riscv_nullop()
-#define        cpufunc_nullop()                riscv_nullop()
-#define        cpu_setttb(a)                   riscv_setttb(a)
+/* NB: fence() is defined as a macro in <machine/atomic.h>. */
 
-#define        cpu_tlb_flushID()               riscv_tlb_flushID()
-#define        cpu_tlb_flushID_SE(e)           riscv_tlb_flushID_SE(e)
+static __inline void
+fence_i(void)
+{
 
-#define        cpu_dcache_wbinv_range(a, s)    riscv_dcache_wbinv_range((a), 
(s))
-#define        cpu_dcache_inv_range(a, s)      riscv_dcache_inv_range((a), (s))
-#define        cpu_dcache_wb_range(a, s)       riscv_dcache_wb_range((a), (s))
+       __asm __volatile("fence.i" ::: "memory");
+}
 
-#define        cpu_idcache_wbinv_range(a, s)   riscv_idcache_wbinv_range((a), 
(s))
-#define        cpu_icache_sync_range(a, s)     riscv_icache_sync_range((a), 
(s))
+static __inline void
+sfence_vma(void)
+{
 
+       __asm __volatile("sfence.vma" ::: "memory");
+}
+
+static __inline void
+sfence_vma_page(uintptr_t addr)
+{
+
+       __asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
+}
+
+#define        cpufunc_nullop()                riscv_nullop()
+
 void riscv_nullop(void);
-void riscv_setttb(vm_offset_t);
-void riscv_tlb_flushID(void);
-void riscv_tlb_flushID_SE(vm_offset_t);
-void riscv_icache_sync_range(vm_offset_t, vm_size_t);
-void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_inv_range(vm_offset_t, vm_size_t);
-void riscv_dcache_wb_range(vm_offset_t, vm_size_t);
 
 #endif /* _KERNEL */
 #endif /* _MACHINE_CPUFUNC_H_ */

Modified: head/sys/riscv/include/kdb.h
==============================================================================
--- head/sys/riscv/include/kdb.h        Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/include/kdb.h        Mon Oct 15 18:56:54 2018        
(r339367)
@@ -47,7 +47,11 @@ static __inline void
 kdb_cpu_sync_icache(unsigned char *addr, size_t size)
 {
 
-       cpu_icache_sync_range((vm_offset_t)addr, size);
+       /*
+        * Other CPUs flush their instruction cache when resuming from
+        * IPI_STOP.
+        */
+       fence_i();
 }
 
 static __inline void

Modified: head/sys/riscv/include/sbi.h
==============================================================================
--- head/sys/riscv/include/sbi.h        Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/include/sbi.h        Mon Oct 15 18:56:54 2018        
(r339367)
@@ -47,18 +47,25 @@
 #define        SBI_REMOTE_SFENCE_VMA_ASID      7
 #define        SBI_SHUTDOWN                    8
 
+/*
+ * Documentation available at
+ * https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.md
+ */
+
 static __inline uint64_t
-sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2)
+sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2,
+    uint64_t arg3)
 {
-
        register uintptr_t a0 __asm ("a0") = (uintptr_t)(arg0);
        register uintptr_t a1 __asm ("a1") = (uintptr_t)(arg1);
        register uintptr_t a2 __asm ("a2") = (uintptr_t)(arg2);
+       register uintptr_t a3 __asm ("a3") = (uintptr_t)(arg3);
        register uintptr_t a7 __asm ("a7") = (uintptr_t)(arg7);
+
        __asm __volatile(                       \
                "ecall"                         \
                :"+r"(a0)                       \
-               :"r"(a1), "r"(a2), "r"(a7)      \
+               :"r"(a1), "r"(a2), "r" (a3), "r"(a7)    \
                :"memory");
 
        return (a0);
@@ -68,49 +75,49 @@ static __inline void
 sbi_console_putchar(int ch)
 {
 
-       sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0);
+       sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0, 0);
 }
 
 static __inline int
 sbi_console_getchar(void)
 {
 
-       return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0));
+       return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0, 0));
 }
 
 static __inline void
 sbi_set_timer(uint64_t val)
 {
 
-       sbi_call(SBI_SET_TIMER, val, 0, 0);
+       sbi_call(SBI_SET_TIMER, val, 0, 0, 0);
 }
 
 static __inline void
 sbi_shutdown(void)
 {
 
-       sbi_call(SBI_SHUTDOWN, 0, 0, 0);
+       sbi_call(SBI_SHUTDOWN, 0, 0, 0, 0);
 }
 
 static __inline void
 sbi_clear_ipi(void)
 {
 
-       sbi_call(SBI_CLEAR_IPI, 0, 0, 0);
+       sbi_call(SBI_CLEAR_IPI, 0, 0, 0, 0);
 }
 
 static __inline void
 sbi_send_ipi(const unsigned long *hart_mask)
 {
 
-       sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0);
+       sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0, 0);
 }
 
 static __inline void
 sbi_remote_fence_i(const unsigned long *hart_mask)
 {
 
-       sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0);
+       sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0, 0);
 }
 
 static __inline void
@@ -118,7 +125,7 @@ sbi_remote_sfence_vma(const unsigned long *hart_mask,
     unsigned long start, unsigned long size)
 {
 
-       sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, 0, 0);
+       sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, start, size, 0);
 }
 
 static __inline void
@@ -127,7 +134,8 @@ sbi_remote_sfence_vma_asid(const unsigned long *hart_m
     unsigned long asid)
 {
 
-       sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, 0, 0);
+       sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, start, size,
+           asid);
 }
 
 #endif /* !_MACHINE_SBI_H_ */

Modified: head/sys/riscv/riscv/cpufunc_asm.S
==============================================================================
--- head/sys/riscv/riscv/cpufunc_asm.S  Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/riscv/cpufunc_asm.S  Mon Oct 15 18:56:54 2018        
(r339367)
@@ -33,70 +33,11 @@
  */
 
 #include <machine/asm.h>
-#include <machine/param.h>
 __FBSDID("$FreeBSD$");
 
        .text
        .align  2
 
-.Lpage_mask:
-       .word   PAGE_MASK
-
 ENTRY(riscv_nullop)
        ret
 END(riscv_nullop)
-
-/*
- * Generic functions to read/modify/write the internal coprocessor registers
- */
-
-ENTRY(riscv_tlb_flushID)
-       sfence.vma
-       ret
-END(riscv_tlb_flushID)
-
-ENTRY(riscv_tlb_flushID_SE)
-       sfence.vma
-       ret
-END(riscv_tlb_flushID_SE)
-
-/*
- * void riscv_dcache_wb_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_wb_range)
-       sfence.vma
-       ret
-END(riscv_dcache_wb_range)
-
-/*
- * void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_wbinv_range)
-       sfence.vma
-       ret
-END(riscv_dcache_wbinv_range)
-
-/*
- * void riscv_dcache_inv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_dcache_inv_range)
-       sfence.vma
-       ret
-END(riscv_dcache_inv_range)
-
-/*
- * void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_idcache_wbinv_range)
-       fence.i
-       sfence.vma
-       ret
-END(riscv_idcache_wbinv_range)
-
-/*
- * void riscv_icache_sync_range(vm_offset_t, vm_size_t)
- */
-ENTRY(riscv_icache_sync_range)
-       fence.i
-       ret
-END(riscv_icache_sync_range)

Modified: head/sys/riscv/riscv/db_interface.c
==============================================================================
--- head/sys/riscv/riscv/db_interface.c Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/riscv/db_interface.c Mon Oct 15 18:56:54 2018        
(r339367)
@@ -151,11 +151,8 @@ db_write_bytes(vm_offset_t addr, size_t size, char *da
                while (size-- > 0)
                        *dst++ = *data++;
 
-               fence();
-
-               /* Clean D-cache and invalidate I-cache */
-               cpu_dcache_wb_range(addr, (vm_size_t)size);
-               cpu_icache_sync_range(addr, (vm_size_t)size);
+               /* Invalidate I-cache */
+               fence_i();
        }
        (void)kdb_jmpbuf(prev_jb);
 

Modified: head/sys/riscv/riscv/mp_machdep.c
==============================================================================
--- head/sys/riscv/riscv/mp_machdep.c   Mon Oct 15 18:39:33 2018        
(r339366)
+++ head/sys/riscv/riscv/mp_machdep.c   Mon Oct 15 18:56:54 2018        
(r339367)
@@ -328,6 +328,12 @@ ipi_handler(void *arg)
                        CPU_CLR_ATOMIC(cpu, &started_cpus);
                        CPU_CLR_ATOMIC(cpu, &stopped_cpus);
                        CTR0(KTR_SMP, "IPI_STOP (restart)");
+
+                       /*
+                        * The kernel debugger might have set a breakpoint,
+                        * so flush the instruction cache.
+                        */
+                       fence_i();
                        break;
                case IPI_HARDCLOCK:
                        CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);

Modified: head/sys/riscv/riscv/pmap.c
==============================================================================
--- head/sys/riscv/riscv/pmap.c Mon Oct 15 18:39:33 2018        (r339366)
+++ head/sys/riscv/riscv/pmap.c Mon Oct 15 18:56:54 2018        (r339367)
@@ -152,6 +152,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
+#include <machine/sbi.h>
 
 #define        NPDEPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
 #define        NUPDE                   (NPDEPG * NPDEPG)
@@ -364,31 +365,12 @@ pmap_is_write(pt_entry_t entry)
 }
 
 static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
-       return ((pmap == pmap_kernel()) ||
-           (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
-}
-
-static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
        return (l3 & PTE_V);
 }
 
-static __inline int
-pmap_l3_valid_cacheable(pt_entry_t l3)
-{
-
-       /* TODO */
-
-       return (0);
-}
-
-#define        PTE_SYNC(pte)   cpu_dcache_wb_range((vm_offset_t)pte, 
sizeof(*pte))
-
 static inline int
 pmap_page_accessed(pt_entry_t pte)
 {
@@ -514,14 +496,13 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t mi
        dmap_phys_max = pa;
        dmap_max_addr = va;
 
-       cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
-       cpu_tlb_flushID();
+       sfence_vma();
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
-       vm_offset_t l2pt, l3pt;
+       vm_offset_t l3pt;
        pt_entry_t entry;
        pd_entry_t *l2;
        vm_paddr_t pa;
@@ -532,7 +513,6 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm
 
        l2 = pmap_l2(kernel_pmap, va);
        l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
-       l2pt = (vm_offset_t)l2;
        l2_slot = pmap_l2_index(va);
        l3pt = l3_start;
 
@@ -550,10 +530,7 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm
 
        /* Clean the L2 page table */
        memset((void *)l3_start, 0, l3pt - l3_start);
-       cpu_dcache_wb_range(l3_start, l3pt - l3_start);
 
-       cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
-
        return (l3pt);
 }
 
@@ -676,7 +653,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart,
        freemempos = pmap_bootstrap_l3(l1pt,
            VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
-       cpu_tlb_flushID();
+       sfence_vma();
 
 #define alloc_pages(var, np)                                           \
        (var) = freemempos;                                             \
@@ -732,8 +709,6 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart,
         * called something like "Maxphyspage".
         */
        Maxmem = atop(phys_avail[avail_slot - 1]);
-
-       cpu_tlb_flushID();
 }
 
 /*
@@ -769,43 +744,99 @@ pmap_init(void)
                rw_init(&pv_list_locks[i], "pmap pv list");
 }
 
+#ifdef SMP
 /*
- * Normal, non-SMP, invalidation functions.
- * We inline these within pmap.c for speed.
+ * For SMP, these functions have to use IPIs for coherence.
+ *
+ * In general, the calling thread uses a plain fence to order the
+ * writes to the page tables before invoking an SBI callback to invoke
+ * sfence_vma() on remote CPUs.
+ *
+ * Since the riscv pmap does not yet have a pm_active field, IPIs are
+ * sent to all CPUs in the system.
  */
-PMAP_INLINE void
+static void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
+       cpuset_t mask;
 
-       /* TODO */
-
        sched_pin();
-       __asm __volatile("sfence.vma %0" :: "r" (va) : "memory");
+       mask = all_cpus;
+       CPU_CLR(PCPU_GET(cpuid), &mask);
+       fence();
+       sbi_remote_sfence_vma(mask.__bits, va, 1);
+       sfence_vma_page(va);
        sched_unpin();
 }
 
-PMAP_INLINE void
+static void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
+       cpuset_t mask;
 
-       /* TODO */
-
        sched_pin();
-       __asm __volatile("sfence.vma");
+       mask = all_cpus;
+       CPU_CLR(PCPU_GET(cpuid), &mask);
+       fence();
+       sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
+
+       /*
+        * Might consider a loop of sfence_vma_page() for a small
+        * number of pages in the future.
+        */
+       sfence_vma();
        sched_unpin();
 }
 
-PMAP_INLINE void
+static void
 pmap_invalidate_all(pmap_t pmap)
 {
+       cpuset_t mask;
 
-       /* TODO */
-
        sched_pin();
-       __asm __volatile("sfence.vma");
+       mask = all_cpus;
+       CPU_CLR(PCPU_GET(cpuid), &mask);
+       fence();
+
+       /*
+        * XXX: The SBI doc doesn't detail how to specify x0 as the
+        * address to perform a global fence.  BBL currently treats
+        * all sfence_vma requests as global however.
+        */
+       sbi_remote_sfence_vma(mask.__bits, 0, 0);
        sched_unpin();
 }
+#else
+/*
+ * Normal, non-SMP, invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+static __inline void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
 
+       sfence_vma_page(va);
+}
+
+static __inline void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+       /*
+        * Might consider a loop of sfence_vma_page() for a small
+        * number of pages in the future.
+        */
+       sfence_vma();
+}
+
+static __inline void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+       sfence_vma();
+}
+#endif
+
 /*
  *     Routine:        pmap_extract
  *     Function:
@@ -937,8 +968,6 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm
                entry |= (pn << PTE_PPN0_S);
                pmap_load_store(l3, entry);
 
-               PTE_SYNC(l3);
-
                va += PAGE_SIZE;
                pa += PAGE_SIZE;
                size -= PAGE_SIZE;
@@ -958,11 +987,9 @@ pmap_kremove(vm_offset_t va)
        l3 = pmap_l3(kernel_pmap, va);
        KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
-       if (pmap_l3_valid_cacheable(pmap_load(l3)))
-               cpu_dcache_wb_range(va, L3_SIZE);
        pmap_load_clear(l3);
-       PTE_SYNC(l3);
-       pmap_invalidate_page(kernel_pmap, va);
+
+       sfence_vma();
 }
 
 void
@@ -981,11 +1008,11 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
                l3 = pmap_l3(kernel_pmap, va);
                KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
                pmap_load_clear(l3);
-               PTE_SYNC(l3);
 
                va += PAGE_SIZE;
                size -= PAGE_SIZE;
        }
+
        pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
@@ -1039,7 +1066,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
                entry |= (pn << PTE_PPN0_S);
                pmap_load_store(l3, entry);
 
-               PTE_SYNC(l3);
                va += L3_SIZE;
        }
        pmap_invalidate_range(kernel_pmap, sva, va);
@@ -1063,10 +1089,7 @@ pmap_qremove(vm_offset_t sva, int count)
                l3 = pmap_l3(kernel_pmap, va);
                KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
-               if (pmap_l3_valid_cacheable(pmap_load(l3)))
-                       cpu_dcache_wb_range(va, L3_SIZE);
                pmap_load_clear(l3);
-               PTE_SYNC(l3);
 
                va += PAGE_SIZE;
        }
@@ -1127,13 +1150,11 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t
                l1 = pmap_l1(pmap, va);
                pmap_load_clear(l1);
                pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
-               PTE_SYNC(l1);
        } else {
                /* PTE page */
                pd_entry_t *l2;
                l2 = pmap_l2(pmap, va);
                pmap_load_clear(l2);
-               PTE_SYNC(l2);
        }
        pmap_resident_count_dec(pmap, 1);
        if (m->pindex < NUPDE) {
@@ -1279,9 +1300,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
                entry |= (pn << PTE_PPN0_S);
                pmap_load_store(l1, entry);
                pmap_distribute_l1(pmap, l1index, entry);
-
-               PTE_SYNC(l1);
-
        } else {
                vm_pindex_t l1index;
                pd_entry_t *l1, *l2;
@@ -1310,8 +1328,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
                entry = (PTE_V);
                entry |= (pn << PTE_PPN0_S);
                pmap_load_store(l2, entry);
-
-               PTE_SYNC(l2);
        }
 
        pmap_resident_count_inc(pmap, 1);
@@ -1445,8 +1461,6 @@ pmap_growkernel(vm_offset_t addr)
                        pmap_load_store(l1, entry);
                        pmap_distribute_l1(kernel_pmap,
                            pmap_l1_index(kernel_vm_end), entry);
-
-                       PTE_SYNC(l1);
                        continue; /* try again */
                }
                l2 = pmap_l1_to_l2(l1, kernel_vm_end);
@@ -1474,7 +1488,6 @@ pmap_growkernel(vm_offset_t addr)
                entry |= (pn << PTE_PPN0_S);
                pmap_load_store(l2, entry);
 
-               PTE_SYNC(l2);
                pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
                kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
@@ -1754,10 +1767,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_
        vm_page_t m;
 
        PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-       if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
-               cpu_dcache_wb_range(va, L3_SIZE);
        old_l3 = pmap_load_clear(l3);
-       PTE_SYNC(l3);
        pmap_invalidate_page(pmap, va);
        if (old_l3 & PTE_SW_WIRED)
                pmap->pm_stats.wired_count -= 1;
@@ -1913,11 +1923,7 @@ pmap_remove_all(vm_page_t m)
                    "a block in %p's pv list", m));
 
                l3 = pmap_l2_to_l3(l2, pv->pv_va);
-               if (pmap_is_current(pmap) &&
-                   pmap_l3_valid_cacheable(pmap_load(l3)))
-                       cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
                tl3 = pmap_load_clear(l3);
-               PTE_SYNC(l3);
                pmap_invalidate_page(pmap, pv->pv_va);
                if (tl3 & PTE_SW_WIRED)
                        pmap->pm_stats.wired_count--;
@@ -1947,7 +1953,7 @@ pmap_remove_all(vm_page_t m)
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
-       vm_offset_t va, va_next;
+       vm_offset_t va_next;
        pd_entry_t *l1, *l2;
        pt_entry_t *l3p, l3;
        pt_entry_t entry;
@@ -1986,7 +1992,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
                if (va_next > eva)
                        va_next = eva;
 
-               va = va_next;
                for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
                    sva += L3_SIZE) {
                        l3 = pmap_load(l3p);
@@ -1994,7 +1999,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
                                entry = pmap_load(l3p);
                                entry &= ~(PTE_W);
                                pmap_load_store(l3p, entry);
-                               PTE_SYNC(l3p);
                                /* XXX: Use pmap_invalidate_range */
                                pmap_invalidate_page(pmap, sva);
                        }
@@ -2092,8 +2096,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
                                entry |= (l2_pn << PTE_PPN0_S);
                                pmap_load_store(l1, entry);
                                pmap_distribute_l1(pmap, pmap_l1_index(va), 
entry);
-                               PTE_SYNC(l1);
-
                                l2 = pmap_l1_to_l2(l1, va);
                        }
 
@@ -2112,7 +2114,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
                        entry = (PTE_V);
                        entry |= (l3_pn << PTE_PPN0_S);
                        pmap_load_store(l2, entry);
-                       PTE_SYNC(l2);
                        l3 = pmap_l2_to_l3(l2, va);
                }
                pmap_invalidate_page(pmap, va);
@@ -2163,10 +2164,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
                        goto validate;
                }
 
-               /* Flush the cache, there might be uncommitted data in it */
-               if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
-                       cpu_dcache_wb_range(va, L3_SIZE);
-
                /*
                 * The physical page has changed.  Temporarily invalidate
                 * the mapping.  This ensures that all threads sharing the
@@ -2225,13 +2222,20 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
                        vm_page_aflag_set(m, PGA_WRITEABLE);
        }
 
+validate:
        /*
+        * Sync the i-cache on all harts before updating the PTE
+        * if the new PTE is executable.
+        */
+       if (prot & VM_PROT_EXECUTE)
+               pmap_sync_icache(pmap, va, PAGE_SIZE);
+
+       /*
         * Update the L3 entry.
         */
        if (orig_l3 != 0) {
-validate:
                orig_l3 = pmap_load_store(l3, new_l3);
-               PTE_SYNC(l3);
+               pmap_invalidate_page(pmap, va);
                KASSERT(PTE_TO_PHYS(orig_l3) == pa,
                    ("pmap_enter: invalid update"));
                if (pmap_page_dirty(orig_l3) &&
@@ -2239,11 +2243,7 @@ validate:
                        vm_page_dirty(m);
        } else {
                pmap_load_store(l3, new_l3);
-               PTE_SYNC(l3);
        }
-       pmap_invalidate_page(pmap, va);
-       if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
-           cpu_icache_sync_range(va, PAGE_SIZE);
 
        if (lock != NULL)
                rw_wunlock(lock);
@@ -2423,9 +2423,16 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, v
         */
        if ((m->oflags & VPO_UNMANAGED) == 0)
                entry |= PTE_SW_MANAGED;
+
+       /*
+        * Sync the i-cache on all harts before updating the PTE
+        * if the new PTE is executable.
+        */
+       if (prot & VM_PROT_EXECUTE)
+               pmap_sync_icache(pmap, va, PAGE_SIZE);
+
        pmap_load_store(l3, entry);
 
-       PTE_SYNC(l3);
        pmap_invalidate_page(pmap, va);
        return (mpte);
 }
@@ -2766,11 +2773,7 @@ pmap_remove_pages(pmap_t pmap)
                                    ("pmap_remove_pages: bad l3 %#jx",
                                    (uintmax_t)tl3));
 
-                               if (pmap_is_current(pmap) &&
-                                   pmap_l3_valid_cacheable(pmap_load(l3)))
-                                       cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
                                pmap_load_clear(l3);
-                               PTE_SYNC(l3);
                                pmap_invalidate_page(pmap, pv->pv_va);
 
                                /*
@@ -3244,16 +3247,10 @@ pmap_activate(struct thread *td)
        critical_exit();
 }
 
-static void
-pmap_sync_icache_one(void *arg __unused)
-{
-
-       __asm __volatile("fence.i");
-}
-
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
+       cpuset_t mask;
 
        /*
         * From the RISC-V User-Level ISA V2.2:
@@ -3263,8 +3260,12 @@ pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t 
         * before requesting that all remote RISC-V harts execute a
         * FENCE.I."
         */
-       __asm __volatile("fence");
-       smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
+       sched_pin();
+       mask = all_cpus;
+       CPU_CLR(PCPU_GET(cpuid), &mask);
+       fence();
+       sbi_remote_fence_i(mask.__bits);
+       sched_unpin();
 }
 
 /*
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to