On Fri, Apr 14, 2017 at 11:51:52AM +0200, Mark Kettenis wrote:
> While reading the arm64 code the ranges covered by the tlb flushes in
> the code that sets the l1/l2/l3 tables made no sense to me. I came to
> the conclusion that these flushes aren't necessary. When we enter the
> final page table entry, we flush the tlb again which flushes any
> cached information for the l1/l2/l3 tables anyway. This means the
> code that flushes ranges can go and arm64_tlbi_asid() can be folded
> into the ttlb_flush() function. One additional fix there. My
> previous changes made the asid == -1 branch unreachable. I think that
> means that page table entries marked as global are actually flushed
> even if the flush targets a specific ASID. But the architecture
> manual isn't very explicit about this. So I reactivated the branch by
> changing the test to pm == pmap_kernel().
>
> In the end this doesn't change the performance in a measurable way, but less
> code is always better isn't it?
>
> ok?
>
>
> Index: arch/arm64/arm64/pmap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.32
> diff -u -p -r1.32 pmap.c
> --- arch/arm64/arm64/pmap.c 13 Apr 2017 20:48:29 -0000 1.32
> +++ arch/arm64/arm64/pmap.c 14 Apr 2017 09:27:22 -0000
> @@ -35,7 +35,6 @@
> #include <ddb/db_output.h>
>
> void pmap_setttb(struct proc *p);
> -void arm64_tlbi_asid(vaddr_t va, int asid);
> void pmap_free_asid(pmap_t pm);
>
> /*
> @@ -68,34 +67,13 @@ pmap_pa_is_mem(uint64_t pa)
> static inline void
> ttlb_flush(pmap_t pm, vaddr_t va)
> {
> - arm64_tlbi_asid(va, pm->pm_asid);
> -}
> -
> -static inline void
> -ttlb_flush_range(pmap_t pm, vaddr_t va, vsize_t size)
> -{
> - vaddr_t eva = va + size;
> -
> - /* if size is over 512 pages, just flush the entire cache !?!?! */
> - if (size >= (512 * PAGE_SIZE)) {
> - cpu_tlb_flush();
> - return;
> - }
> -
> - for ( ; va < eva; va += PAGE_SIZE)
> - arm64_tlbi_asid(va, pm->pm_asid);
> -}
> -
> -void
> -arm64_tlbi_asid(vaddr_t va, int asid)
> -{
> vaddr_t resva;
>
> resva = ((va >> PAGE_SHIFT) & ((1ULL << 44) - 1));
> - if (asid == -1) {
> + if (pm == pmap_kernel()) {
> cpu_tlb_flush_all_asid(resva);
> } else {
> - resva |= (unsigned long long)asid << 48;
> + resva |= (uint64_t)pm->pm_asid << 48;
> cpu_tlb_flush_asid(resva);
> }
> }
> @@ -1267,8 +1245,6 @@ pmap_set_l1(struct pmap *pm, uint64_t va
> idx0 = VP_IDX0(va);
> pm->pm_vp.l0->vp[idx0] = l1_va;
> pm->pm_vp.l0->l0[idx0] = pg_entry;
> -
> - ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX1_POS);
> }
>
> void
> @@ -1299,8 +1275,6 @@ pmap_set_l2(struct pmap *pm, uint64_t va
> vp1 = pm->pm_vp.l1;
> vp1->vp[idx1] = l2_va;
> vp1->l1[idx1] = pg_entry;
> -
> - ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX2_POS);
> }
>
> void
> @@ -1334,8 +1308,6 @@ pmap_set_l3(struct pmap *pm, uint64_t va
> vp2 = vp1->vp[idx1];
> vp2->vp[idx2] = l3_va;
> vp2->l2[idx2] = pg_entry;
> -
> - ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX3_POS);
> }
>
> /*
> @@ -1578,7 +1550,7 @@ pmap_pte_remove(struct pte_desc *pted, i
> if (remove_pted)
> vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
>
> - arm64_tlbi_asid(pted->pted_va, pm->pm_asid);
> + ttlb_flush(pm, pted->pted_va);
Other callers pass pted->pted_va & ~PAGE_MASK, but since ttlb_flush()
is doing va >> PAGE_SHIFT this probably isn't needed anyway. Might
make sense to remove those from the other calls as well. Afaik it
was simply for being explicit that we should pass a VA and skip its
attributes stored in the lower bits.
Diff doesn't make it any worse on my Seattle, ok patrick@.
Patrick
> }
>
> /*
>