Module Name: src Committed By: maxv Date: Thu Jun 15 06:32:52 UTC 2017
Modified Files: src/sys/arch/x86/x86: pmap.c Log Message: Reorder these loops to reduce the number of enter->flush. I figured out yesterday that this has a clear impact: a system with 16TB of hard-coded ram has a 4-second black screen when booting. Now we're down to < 0.5s. It could be optimized more, but verily I don't have a machine with P1GB right now. To generate a diff of this commit: cvs rdiff -u -r1.246 -r1.247 src/sys/arch/x86/x86/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.246 src/sys/arch/x86/x86/pmap.c:1.247 --- src/sys/arch/x86/x86/pmap.c:1.246 Wed Jun 14 14:17:15 2017 +++ src/sys/arch/x86/x86/pmap.c Thu Jun 15 06:32:52 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.246 2017/06/14 14:17:15 maxv Exp $ */ +/* $NetBSD: pmap.c,v 1.247 2017/06/15 06:32:52 maxv Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017 The NetBSD Foundation, Inc. @@ -171,7 +171,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.246 2017/06/14 14:17:15 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.247 2017/06/15 06:32:52 maxv Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -1443,8 +1443,8 @@ pmap_init_directmap(struct pmap *kpm) pt_entry_t *pte; pd_entry_t *pde; phys_ram_seg_t *mc; - size_t nL4e, nL3e; - int i; + size_t nL4e, nL3e, pn, npd; + int i, n; const pd_entry_t pteflags = PG_V | PG_KW | pmap_pg_nx; @@ -1503,36 +1503,37 @@ pmap_init_directmap(struct pmap *kpm) /* Allocate L2. */ L2page_pa = pmap_bootstrap_palloc(nL3e); - /* Zero out the L2 pages. */ + KASSERT(pmap_largepages != 0); + + /* Large pages are supported. Just create L2. */ for (i = 0; i < nL3e; i++) { pdp = L2page_pa + i * PAGE_SIZE; *pte = (pdp & PG_FRAME) | pteflags; pmap_update_pg(tmpva); memset((void *)tmpva, 0, PAGE_SIZE); - } - KASSERT(pmap_largepages != 0); - - /* Large pages are supported. Just create L2. */ - for (i = 0; i < NPDPG * nL3e; i++) { - pdp = (paddr_t)&(((pd_entry_t *)L2page_pa)[i]); - *pte = (pdp & PG_FRAME) | pteflags; - pmap_update_pg(tmpva); - - pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); - *pde = ((paddr_t)i << L2_SHIFT) | pteflags | - PG_U | PG_PS | PG_G; + pde = (pd_entry_t *)tmpva; + for (n = 0; n < NPDPG; n++) { + pn = (i * NPDPG) + n; + pde[n] = ((paddr_t)pn << L2_SHIFT) | pteflags | + PG_U | PG_PS | PG_G; + } } /* Fill in the L3 entries, linked to L2. */ - for (i = 0; i < nL3e; i++) { - pdp = (paddr_t)&(((pd_entry_t *)L3page_pa)[i]); + for (i = 0; i < nL4e; i++) { + pdp = L3page_pa + i * PAGE_SIZE; *pte = (pdp & PG_FRAME) | pteflags; pmap_update_pg(tmpva); - pde = (pd_entry_t *)(tmpva + (pdp & ~PG_FRAME)); - *pde = (L2page_pa + (i << PAGE_SHIFT)) | pteflags | PG_U; + pde = (pd_entry_t *)tmpva; + npd = (i == nL4e - 1) ? (nL3e % NPDPG) : NPDPG; + for (n = 0; n < npd; n++) { + pn = (i * NPDPG) + n; + pde[n] = (L2page_pa + (pn << PAGE_SHIFT)) | + pteflags | PG_U; + } } }