Another armv7 pmap diff

2016-08-20 Thread Mark Kettenis
The current pmap_enter() implementation always enters PROT_WRITE
mappings as read-only and relies on pmap_fault_fixup() to fix this up.
This is obviously not the intention, since we do go through the
trouble of checking whether we have to do modified bit emulation.

The diff below fixes this.

ok?


Index: pmap7.c
===
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.42
diff -u -p -r1.42 pmap7.c
--- pmap7.c 19 Aug 2016 17:31:04 -  1.42
+++ pmap7.c 20 Aug 2016 08:47:19 -
@@ -1165,9 +1165,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
nflags |= PVF_REF;
npte |= L2_V7_AF;
 
-   if ((prot & PROT_WRITE) != 0 &&
-   ((flags & PROT_WRITE) != 0 ||
-(pg->mdpage.pvh_attrs & PVF_MOD) != 0)) {
+   if ((flags & PROT_WRITE) ||
+(pg->mdpage.pvh_attrs & PVF_MOD)) {
/*
 * This is a writable mapping, and the
 * page's mod state indicates it has
@@ -1175,6 +1174,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 * writable from the outset.
 */
nflags |= PVF_MOD;
+   } else {
+   prot &= ~PROT_WRITE;
}
} else {
/*
@@ -1241,8 +1242,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
/*
 * Make sure userland mappings get the right permissions
 */
-   npte |= L2_S_PROT(pm == pmap_kernel() ?  PTE_KERNEL : PTE_USER,
-   prot & ~PROT_WRITE);
+   npte |= L2_S_PROT(pm == pmap_kernel() ?  PTE_KERNEL : PTE_USER, prot);
 
/*
 * Keep the stats up to date



Re: Another armv7 pmap diff

2016-08-07 Thread Marcus Glocker
On Sun, Aug 07, 2016 at 01:58:06PM +0200, Mark Kettenis wrote:

> The ARMv7 ARM explicitly states that all data caches in the system are
> effectively PIPT.  This effectively means that there is no reason to
> clean or invalidate data caches except when:
> 
> 1. We need to synchronize the instruction cache with the data cache.
> 
> 2. We change a page to be non-cachable.
> 
> 3. We're invoking bus_dmamap_sync() for cachable pages.
> 
> 4. We're synching PTEs on a processor that doesn';t coherently walk
>the page tables.
> 
> This means we can remove most cases where we clean or invalidate the
> data cache from pmap7.c.
> 
> The following diff seems to work fine on Cortex-A9 and shaves another
> minute and a half from the kernel build time.
> 
> Further testing, especially on Cortex-A8, would be appreciated.

No issues notice on Cortex-A8.  Kernel build remains at ~19m.



Re: Another armv7 pmap diff

2016-08-07 Thread Daniel Bolgheroni
On Sun, Aug 07, 2016 at 01:58:06PM +0200, Mark Kettenis wrote:
> Further testing, especially on Cortex-A8, would be appreciated.

Works for me. Tested on BeagleBone Black. Kernel compile times remains
pretty much the same, though.

-- 
db



Another armv7 pmap diff

2016-08-07 Thread Mark Kettenis
The ARMv7 ARM explicitly states that all data caches in the system are
effectively PIPT.  This effectively means that there is no reason to
clean or invalidate data caches except when:

1. We need to synchronize the instruction cache with the data cache.

2. We change a page to be non-cachable.

3. We're invoking bus_dmamap_sync() for cachable pages.

4. We're synching PTEs on a processor that doesn';t coherently walk
   the page tables.

This means we can remove most cases where we clean or invalidate the
data cache from pmap7.c.

The following diff seems to work fine on Cortex-A9 and shaves another
minute and a half from the kernel build time.

Further testing, especially on Cortex-A8, would be appreciated.


Index: arch/arm/arm/pmap7.c
===
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.33
diff -u -p -r1.33 pmap7.c
--- arch/arm/arm/pmap7.c6 Aug 2016 16:46:25 -   1.33
+++ arch/arm/arm/pmap7.c7 Aug 2016 11:46:17 -
@@ -917,9 +917,6 @@ pmap_l2ptp_ctor(void *v)
ptep = &l2b->l2b_kva[l2pte_index(va)];
pte = *ptep;
 
-   /* XXX redundant with PTE_SYNC_RANGE() ? */
-   cpu_idcache_wbinv_range(va, PAGE_SIZE);
-   cpu_sdcache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE);
if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
*ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
PTE_SYNC(ptep);
@@ -952,6 +949,9 @@ pmap_uncache_page(paddr_t va, vaddr_t pa
PTE_SYNC(pte);
cpu_tlb_flushD_SE(va);
cpu_cpwait();
+
+   cpu_dcache_wbinv_range(va, PAGE_SIZE);
+   cpu_sdcache_wbinv_range(va, pa, PAGE_SIZE);
 }
 
 /*
@@ -1054,7 +1054,6 @@ pmap_clean_page(struct vm_page *pg, int 
 {
pmap_t pm;
struct pv_entry *pv;
-   boolean_t wb = FALSE;
 
/*
 * To save time, we are only walking the pv list if an I$ invalidation
@@ -1080,34 +1079,8 @@ pmap_clean_page(struct vm_page *pg, int 
 
if (PV_BEEN_EXECD(pv->pv_flags))
cpu_icache_sync_range(pv->pv_va, PAGE_SIZE);
-
-   /*
-* If we have not written back that page yet, do this
-* now while we still have a valid mapping for it.
-*/
-   if (!wb) {
-   cpu_dcache_wb_range(pv->pv_va, PAGE_SIZE);
-   cpu_sdcache_wb_range(pv->pv_va,
-   VM_PAGE_TO_PHYS(pg), PAGE_SIZE);
-   wb = TRUE;
-   }
}
}
-
-   /*
-* If there is no active mapping left, or we did not bother checking
-* for one, this does not mean the page doesn't have stale data. Map
-* it in a working page and writeback.
-*/
-   if (!wb) {
-   *cwb_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(pg) |
-   L2_S_PROT(PTE_KERNEL, PROT_WRITE) | pte_l2_s_cache_mode;
-   PTE_SYNC(cwb_pte);
-   cpu_tlb_flushD_SE(cwbp);
-   cpu_cpwait();
-   cpu_dcache_wb_range(cwbp, PAGE_SIZE);
-   cpu_sdcache_wb_range(cwbp, VM_PAGE_TO_PHYS(pg), PAGE_SIZE);
-   }
 }
 
 /*
@@ -1152,15 +1125,6 @@ pmap_page_remove(struct vm_page *pg)
(pm == curpm || pm == pmap_kernel())) {
if (PV_BEEN_EXECD(pv->pv_flags))
cpu_icache_sync_range(pv->pv_va, 
PAGE_SIZE);
-   if (flush == FALSE) {
-   paddr_t pa;
-   cpu_dcache_wb_range(pv->pv_va,
-   PAGE_SIZE);
-   if (pmap_extract(pm, (vaddr_t)pv->pv_va,
-   &pa))
-   cpu_sdcache_wb_range(pv->pv_va,
-   pa, PAGE_SIZE);
-   }
flush = TRUE;
}
 
@@ -1338,18 +1302,6 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
oflags = pmap_modify_pv(pg, pm, va,
PVF_WRITE | PVF_EXEC | PVF_WIRED |
PVF_MOD | PVF_REF, nflags);
-
-   /*
-* We may need to flush the cache if we're
-* doing rw-ro...
-*/
-   if ((oflags & PVF_NC) == 0 &&
-   l2pte_is_writeable(opte, pm) &&
-   (prot & PROT_WRITE) == 0) {
-   cpu_dcache_wb_range(va, PAGE_SIZE);
-   cpu_sdcache_wb_range(va, opte & L2_S_FRAME,
-