On Fri, Apr 14, 2017 at 11:51:52AM +0200, Mark Kettenis wrote:
> While reading the arm64 code the ranges covered by the tlb flushes in
> the code that sets the l1/l2/l3 tables made no sense to me.  I came to
> the conclusion that these flushes aren't necessary.  When we enter the
> final page table entry, we flush the tlb again which flushes any
> cached information for the l1/l2/l3 tables anyway.  This means the
> code that flushes ranges can go and arm64_tlbi_asid() can be folded
> into the ttlb_flush() function.  One additional fix there.  My
> previous changes made the asid == -1 branch unreachable.  I think that
> means that page table entries marked as global are actually flushed
> even if the flush targets a specific ASID.  But the architecture
> manual isn't very explicit about this.  So I reactivated the branch by
> changing the test to pm == pmap_kernel().
> 
> In the end this doesn't change the performance in a measurable way, but less 
> code is always better isn't it?
> 
> ok?
> 
> 
> Index: arch/arm64/arm64/pmap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.32
> diff -u -p -r1.32 pmap.c
> --- arch/arm64/arm64/pmap.c   13 Apr 2017 20:48:29 -0000      1.32
> +++ arch/arm64/arm64/pmap.c   14 Apr 2017 09:27:22 -0000
> @@ -35,7 +35,6 @@
>  #include <ddb/db_output.h>
>  
>  void pmap_setttb(struct proc *p);
> -void arm64_tlbi_asid(vaddr_t va, int asid);
>  void pmap_free_asid(pmap_t pm);
>  
>  /*
> @@ -68,34 +67,13 @@ pmap_pa_is_mem(uint64_t pa)
>  static inline void
>  ttlb_flush(pmap_t pm, vaddr_t va)
>  {
> -     arm64_tlbi_asid(va, pm->pm_asid);
> -}
> -
> -static inline void
> -ttlb_flush_range(pmap_t pm, vaddr_t va, vsize_t size)
> -{
> -     vaddr_t eva = va + size;
> -
> -     /* if size is over 512 pages, just flush the entire cache !?!?! */
> -     if (size >= (512 * PAGE_SIZE)) {
> -             cpu_tlb_flush();
> -             return;
> -     }
> -
> -     for ( ; va < eva; va += PAGE_SIZE)
> -             arm64_tlbi_asid(va, pm->pm_asid);
> -}
> -
> -void
> -arm64_tlbi_asid(vaddr_t va, int asid)
> -{
>       vaddr_t resva;
>  
>       resva = ((va >> PAGE_SHIFT) & ((1ULL << 44) - 1));
> -     if (asid == -1) {
> +     if (pm == pmap_kernel()) {
>               cpu_tlb_flush_all_asid(resva);
>       } else {
> -             resva |= (unsigned long long)asid << 48;
> +             resva |= (uint64_t)pm->pm_asid << 48;
>               cpu_tlb_flush_asid(resva);
>       }
>  }
> @@ -1267,8 +1245,6 @@ pmap_set_l1(struct pmap *pm, uint64_t va
>       idx0 = VP_IDX0(va);
>       pm->pm_vp.l0->vp[idx0] = l1_va;
>       pm->pm_vp.l0->l0[idx0] = pg_entry;
> -
> -     ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX1_POS);
>  }
>  
>  void
> @@ -1299,8 +1275,6 @@ pmap_set_l2(struct pmap *pm, uint64_t va
>               vp1 = pm->pm_vp.l1;
>       vp1->vp[idx1] = l2_va;
>       vp1->l1[idx1] = pg_entry;
> -
> -     ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX2_POS);
>  }
>  
>  void
> @@ -1334,8 +1308,6 @@ pmap_set_l3(struct pmap *pm, uint64_t va
>       vp2 = vp1->vp[idx1];
>       vp2->vp[idx2] = l3_va;
>       vp2->l2[idx2] = pg_entry;
> -
> -     ttlb_flush_range(pm, va & ~PAGE_MASK, 1<<VP_IDX3_POS);
>  }
>  
>  /*
> @@ -1578,7 +1550,7 @@ pmap_pte_remove(struct pte_desc *pted, i
>       if (remove_pted)
>               vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
>  
> -     arm64_tlbi_asid(pted->pted_va, pm->pm_asid);
> +     ttlb_flush(pm, pted->pted_va);

Other callers pass pted->pted_va & ~PAGE_MASK, but since ttlb_flush()
is doing va >> PAGE_SHIFT this probably isn't needed anyway.  Might
make sense to remove those from the other calls as well.  Afaik it
was simply for being explicit that we should pass a VA and skip its
attributes stored in the lower bits.

Diff doesn't make it any worse on my Seattle, ok patrick@.

Patrick

>  }
>  
>  /*
> 

Reply via email to