While reading the arm64 code the ranges covered by the tlb flushes in
the code that sets the l1/l2/l3 tables made no sense to me. I came to
the conclusion that these flushes aren't necessary. When we enter the
final page table entry, we flush the tlb again which flushes any
cached information for the l1/l2/l3 tables anyway. This means the
code that flushes ranges can go and arm64_tlbi_asid() can be folded
into the ttlb_flush() function. One additional fix there. My
previous changes made the asid == -1 branch unreachable. I think that
means that page table entries marked as global are actually flushed
even if the flush targets a specific ASID. But the architecture
manual isn't very explicit about this. So I reactivated the branch by
changing the test to pm == pmap_kernel().
In the end this doesn't change the performance in a measurable way, but less
code is always better isn't it?
ok?
Index: arch/arm64/arm64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
retrieving revision 1.32
diff -u -p -r1.32 pmap.c
--- arch/arm64/arm64/pmap.c 13 Apr 2017 20:48:29 -0000 1.32
+++ arch/arm64/arm64/pmap.c 14 Apr 2017 09:27:22 -0000
@@ -35,7 +35,6 @@
#include <ddb/db_output.h>
void pmap_setttb(struct proc *p);
-void arm64_tlbi_asid(vaddr_t va, int asid);
void pmap_free_asid(pmap_t pm);
/*
@@ -68,34 +67,13 @@ pmap_pa_is_mem(uint64_t pa)
static inline void
ttlb_flush(pmap_t pm, vaddr_t va)
{
- arm64_tlbi_asid(va, pm->pm_asid);
-}
-
-static inline void
-ttlb_flush_range(pmap_t pm, vaddr_t va, vsize_t size)
-{
- vaddr_t eva = va + size;
-
- /* if size is over 512 pages, just flush the entire cache !?!?! */
- if (size >= (512 * PAGE_SIZE)) {
- cpu_tlb_flush();
- return;
- }
-
- for ( ; va < eva; va += PAGE_SIZE)
- arm64_tlbi_asid(va, pm->pm_asid);
-}
-
-void
-arm64_tlbi_asid(vaddr_t va, int asid)
-{
vaddr_t resva;
resva = ((va >> PAGE_SHIFT) & ((1ULL << 44) - 1));
- if (asid == -1) {
+ if (pm == pmap_kernel()) {
cpu_tlb_flush_all_asid(resva);
} else {
- resva |= (unsigned long long)asid << 48;
+ resva |= (uint64_t)pm->pm_asid << 48;
cpu_tlb_flush_asid(resva);
}
}
@@ -1267,8 +1245,6 @@ pmap_set_l1(struct pmap *pm, uint64_t va
idx0 = VP_IDX0(va);
pm->pm_vp.l0->vp[idx0] = l1_va;
pm->pm_vp.l0->l0[idx0] = pg_entry;
-
- ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX1_POS);
}
void
@@ -1299,8 +1275,6 @@ pmap_set_l2(struct pmap *pm, uint64_t va
vp1 = pm->pm_vp.l1;
vp1->vp[idx1] = l2_va;
vp1->l1[idx1] = pg_entry;
-
- ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX2_POS);
}
void
@@ -1334,8 +1308,6 @@ pmap_set_l3(struct pmap *pm, uint64_t va
vp2 = vp1->vp[idx1];
vp2->vp[idx2] = l3_va;
vp2->l2[idx2] = pg_entry;
-
- ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX3_POS);
}
/*
@@ -1578,7 +1550,7 @@ pmap_pte_remove(struct pte_desc *pted, i
if (remove_pted)
vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
- arm64_tlbi_asid(pted->pted_va, pm->pm_asid);
+ ttlb_flush(pm, pted->pted_va);
}
/*