On Wed, Aug 24, 2016 at 11:51:56AM +0200, Mark Kettenis wrote:
> The pmap_fault_fixup() functions does a number of different fixups:
>
> 1. page modified emulation
>
> 2. page access emulation
>
> 3. entering level-1 entries for level-2 page tables
>
> Since it is a generic function that gets called for many fault types,
> it has to be careful to only enter mappings that are actually allowed
> by the page protection bits. This duplicates some of the logic that
> uvm_fault() already implements. And I'm not entirely sure that there
> isn't a corner case where it gets things wrong.
>
> Fortunately the changes I made to other parts of the pmap make a lot
> of the work that pmap_fault_fixup() does unnecessary. Since
> pmap_enter() now properly enters writable mappings, the page modified
> handling can simply rely on uvm_fault() doing the right thing. That's
> how things work on other architectures. The only thing we really need
> to do is to set the access flag on level-2 page table entries. And
> there is a separate fault type for that!
>
> So the diff below removes pmap_fault_fixup(), and replaces it with a
> dab_access() function that just handles the access flag fault. This
> makes the #ifdef spaghetti in fault.c slightly worse, but I think
> that's worth it. Perhaps we should introduce a fault7.c at some
> point. Or remove support for zaurus when the time comes.
>
> I sprinkled quite a few KASSERTs into dab_access(). These all check
> conditions that shouldn't happen on a uniprocessor kernel. For now, I
> want to crash hard when we hit one of those. When we add SMP support,
> some of these will need to be relaxed.
>
> ok?
>
Yes, please. pmap_fault_fixup() is/was a source of corner case bugs.
-Artturi
>
> Index: arch/arm/arm/fault.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm/arm/fault.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 fault.c
> --- arch/arm/arm/fault.c 19 Aug 2016 19:07:37 -0000 1.22
> +++ arch/arm/arm/fault.c 24 Aug 2016 09:31:24 -0000
> @@ -131,6 +131,8 @@ static int dab_align(trapframe_t *, u_in
> struct sigdata *sd);
> static int dab_buserr(trapframe_t *, u_int, u_int, struct proc *,
> struct sigdata *sd);
> +extern int dab_access(trapframe_t *, u_int, u_int, struct proc *,
> + struct sigdata *sd);
>
> static const struct data_abort data_aborts[] = {
> #ifndef CPU_ARMv7
> @@ -154,10 +156,10 @@ static const struct data_abort data_abor
> {dab_fatal, "V7 fault 00000"},
> {dab_align, "Alignment fault"},
> {dab_fatal, "Debug event"},
> - {NULL, "Access flag fault (L1)"},
> + {dab_fatal, "Access flag fault (L1)"},
> {dab_buserr, "Fault on instruction cache maintenance"},
> {NULL, "Translation fault (L1)"},
> - {NULL, "Access flag fault (L2)"},
> + {dab_access, "Access flag fault (L2)"},
> {NULL, "Translation fault (L2)"},
> {dab_buserr, "Synchronous external abort"},
> {NULL, "Domain fault (L1)"},
> @@ -361,6 +363,7 @@ data_abort_handler(trapframe_t *tf)
> ftype = fsr & FAULT_WNR ? PROT_WRITE : PROT_READ;
> #endif
>
> +#ifndef CPU_ARMv7
> /*
> * See if the fault is as a result of ref/mod emulation,
> * or domain mismatch.
> @@ -375,6 +378,7 @@ data_abort_handler(trapframe_t *tf)
> #endif
> goto out;
> }
> +#endif
>
> if (__predict_false(curcpu()->ci_idepth > 0)) {
> if (pcb->pcb_onfault) {
> @@ -668,8 +672,17 @@ prefetch_abort_handler(trapframe_t *tf)
> if (__predict_true((tf->tf_spsr & PSR_I) == 0))
> enable_interrupts(PSR_I);
>
> - /* Get fault address */
> + /* Get the current proc structure */
> p = curproc;
> +
> +#ifdef CPU_ARMv7
> + /* Invoke access fault handler if appropriate */
> + if (FAULT_TYPE_V7(fsr) == FAULT_ACCESS_2) {
> + dab_access(tf, fsr, far, p, NULL);
> + goto out;
> + }
> +#endif
> +
> p->p_addr->u_pcb.pcb_tf = tf;
>
> /* Ok validate the address, can only execute in USER space */
> @@ -683,6 +696,7 @@ prefetch_abort_handler(trapframe_t *tf)
> map = &p->p_vmspace->vm_map;
> va = trunc_page(far);
>
> +#ifndef CPU_ARMv7
> /*
> * See if the pmap can handle this fault on its own...
> */
> @@ -691,6 +705,7 @@ prefetch_abort_handler(trapframe_t *tf)
> #endif
> if (pmap_fault_fixup(map->pmap, va, PROT_READ | PROT_EXEC, 1))
> goto out;
> +#endif
>
> #ifdef DIAGNOSTIC
> if (__predict_false(curcpu()->ci_idepth > 0)) {
> Index: arch/arm/arm/pmap7.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 pmap7.c
> --- arch/arm/arm/pmap7.c 20 Aug 2016 21:08:16 -0000 1.46
> +++ arch/arm/arm/pmap7.c 24 Aug 2016 09:31:24 -0000
> @@ -1699,153 +1699,70 @@ pmap_clear_reference(struct vm_page *pg)
> */
> /* See <arm/pmap.h> */
>
> +/*
> + * dab_access() handles the following data aborts:
> + *
> + * FAULT_ACCESS_2 - Access flag fault -- Level 2
> + *
> + * Set the Access Flag and mark the page as referenced.
> + */
> int
> -pmap_fault_fixup(pmap_t pm, vaddr_t va, vm_prot_t ftype, int user)
> +dab_access(trapframe_t *tf, u_int fsr, u_int far, struct proc *p)
> {
> + struct pmap *pm = p->p_vmspace->vm_map.pmap;
> + vaddr_t va = trunc_page(far);
> struct l2_dtable *l2;
> struct l2_bucket *l2b;
> - pd_entry_t *pl1pd, l1pd;
> pt_entry_t *ptep, pte;
> + struct pv_entry *pv;
> + struct vm_page *pg;
> paddr_t pa;
> u_int l1idx;
> - int rv = 0;
>
> l1idx = L1_IDX(va);
>
> /*
> * If there is no l2_dtable for this address, then the process
> * has no business accessing it.
> - *
> - * Note: This will catch userland processes trying to access
> - * kernel addresses.
> */
> l2 = pm->pm_l2[L2_IDX(l1idx)];
> - if (l2 == NULL)
> - goto out;
> + KASSERT(l2 != NULL);
>
> /*
> * Likewise if there is no L2 descriptor table
> */
> l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
> - if (l2b->l2b_kva == NULL)
> - goto out;
> + KASSERT(l2b->l2b_kva != NULL);
>
> /*
> * Check the PTE itself.
> */
> ptep = &l2b->l2b_kva[l2pte_index(va)];
> pte = *ptep;
> - if (pte == L2_TYPE_INV)
> - goto out;
> -
> - if ((ftype & PROT_EXEC) && (pte & L2_V7_S_XN))
> - goto out;
> -
> - /* only if vectors are low ?? */
> - /*
> - * Catch a userland access to the vector page mapped at 0x0
> - */
> - if (user) {
> - if ((pte & L2_V7_AP(0x2)) == 0)
> - goto out;
> - }
> + KASSERT(pte != L2_TYPE_INV);
>
> pa = l2pte_pa(pte);
>
> - if ((ftype & PROT_WRITE) && !l2pte_is_writeable(pte, pm)) {
> - /*
> - * This looks like a good candidate for "page modified"
> - * emulation...
> - */
> - struct pv_entry *pv;
> - struct vm_page *pg;
> -
> - /* Extract the physical address of the page */
> - if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
> - goto out;
> -
> - /* Get the current flags for this page. */
> - pv = pmap_find_pv(pg, pm, va);
> - if (pv == NULL)
> - goto out;
> -
> - /*
> - * Do the flags say this page is writable? If not then it
> - * is a genuine write fault. If yes then the write fault is
> - * our fault as we did not reflect the write access in the
> - * PTE. Now we know a write has occurred we can correct this
> - * and also set the modified bit
> - */
> - if ((pv->pv_flags & PVF_WRITE) == 0)
> - goto out;
> -
> - NPDEBUG(PDB_FOLLOW,
> - printf("pmap_fault_fixup: mod emul. pm %p, va 0x%08lx, pa
> 0x%08lx\n",
> - pm, va, pg->phys_addr));
> -
> - pg->mdpage.pvh_attrs |= PVF_REF | PVF_MOD;
> - pv->pv_flags |= PVF_REF | PVF_MOD;
> -
> - /*
> - * Re-enable write permissions for the page.
> - * We've already set the cacheable bits based on
> - * the assumption that we can write to this page.
> - */
> - *ptep = (pte & ~L2_V7_AP(0x4));
> - PTE_SYNC(ptep);
> - rv = 1;
> - } else if ((pte & L2_V7_AF) == 0) {
> - /*
> - * This looks like a good candidate for "page referenced"
> - * emulation.
> - */
> - struct pv_entry *pv;
> - struct vm_page *pg;
> -
> - /* Extract the physical address of the page */
> - if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
> - goto out;
> -
> - /* Get the current flags for this page. */
> - pv = pmap_find_pv(pg, pm, va);
> - if (pv == NULL)
> - goto out;
> -
> - pg->mdpage.pvh_attrs |= PVF_REF;
> - pv->pv_flags |= PVF_REF;
> - pte |= L2_V7_AF;
> -
> - NPDEBUG(PDB_FOLLOW,
> - printf("pmap_fault_fixup: ref emul. pm %p, va 0x%08lx, pa
> 0x%08lx\n",
> - pm, va, pg->phys_addr));
> -
> - *ptep = pte;
> - PTE_SYNC(ptep);
> - rv = 1;
> - } else {
> -printf("%s: va %08lx ftype %x %c pte %08x\n", __func__, va, ftype, user ?
> 'u' : 's', pte);
> - goto out;
> - }
> -
> /*
> - * We know there is a valid mapping here, so simply
> - * fix up the L1 if necessary.
> + * Perform page referenced emulation.
> */
> - pl1pd = &pm->pm_l1->l1_kva[l1idx];
> - l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO;
> - if (*pl1pd != l1pd) {
> - *pl1pd = l1pd;
> - PTE_SYNC(pl1pd);
> - rv = 1;
> - }
> -
> - if (rv) {
> - cpu_tlb_flushID_SE(va);
> - cpu_cpwait();
> - }
> + KASSERT((pte & L2_V7_AF) == 0);
>
> -out:
> - return (rv);
> + /* Extract the physical address of the page */
> + pg = PHYS_TO_VM_PAGE(pa);
> + KASSERT(pg != NULL);
> +
> + /* Get the current flags for this page. */
> + pv = pmap_find_pv(pg, pm, va);
> + KASSERT(pv != NULL);
> +
> + pg->mdpage.pvh_attrs |= PVF_REF;
> + pv->pv_flags |= PVF_REF;
> + pte |= L2_V7_AF;
> +
> + *ptep = pte;
> + PTE_SYNC(ptep);
> + return 0;
> }
>
> /*
> Index: arch/arm/include/armreg.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm/include/armreg.h,v
> retrieving revision 1.35
> diff -u -p -r1.35 armreg.h
> --- arch/arm/include/armreg.h 14 Aug 2016 11:30:54 -0000 1.35
> +++ arch/arm/include/armreg.h 24 Aug 2016 09:31:24 -0000
> @@ -314,6 +314,10 @@
> #define FAULT_PERM_S 0x0d /* Permission -- Section */
> #define FAULT_PERM_P 0x0f /* Permission -- Page */
>
> +/* Fault type definitions for ARM v7 */
> +#define FAULT_ACCESS_1 0x03 /* Access flag fault -- Level 1 */
> +#define FAULT_ACCESS_2 0x06 /* Access flag fault -- Level 2 */
> +
> #define FAULT_IMPRECISE 0x400 /* Imprecise exception (XSCALE) */
>
> #define FAULT_EXT 0x00001000 /* external abort */
>