Module Name:    src
Committed By:   jdolecek
Date:           Sat Feb  6 21:24:20 UTC 2021

Modified Files:
        src/sys/arch/x86/x86: pmap.c

Log Message:
use __builtin_assume_aligned() on places where the memset() or memcpy()
parameters are known to be PAGE_SIZE-aligned, so compiler doesn't need
to emit atrocious alignment check code when inlining them

this particularly improves pmap_zero_page() and pmap_copy_page(),
which are now reduced to only 'rep stosq', and close to what a hand-written
assembly would do

Note: on CPUs supporting ERMS, 'rep stosb' would still be slightly faster, but
this is a solid stop-gap improvement

suggested by Mateusz Guzik in:
http://mail-index.netbsd.org/tech-kern/2020/07/19/msg026620.html


To generate a diff of this commit:
cvs rdiff -u -r1.408 -r1.409 src/sys/arch/x86/x86/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.408 src/sys/arch/x86/x86/pmap.c:1.409
--- src/sys/arch/x86/x86/pmap.c:1.408	Mon Nov 30 17:06:02 2020
+++ src/sys/arch/x86/x86/pmap.c	Sat Feb  6 21:24:19 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.408 2020/11/30 17:06:02 bouyer Exp $	*/
+/*	$NetBSD: pmap.c,v 1.409 2021/02/06 21:24:19 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.408 2020/11/30 17:06:02 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.409 2021/02/06 21:24:19 jdolecek Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -339,6 +339,9 @@ paddr_t pmap_pa_end;   /* PA of last phy
 #define	PMAP_CHECK_PP(pp) \
     KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)
 
+#define PAGE_ALIGNED(pp)	\
+	__builtin_assume_aligned((void *)(pp), PAGE_SIZE)
+
 /*
  * Other data structures
  */
@@ -1297,7 +1300,7 @@ pmap_bootstrap(vaddr_t kva_start)
 	xen_dummy_user_pgd = xen_dummy_page - KERNBASE;
 
 	/* Zero fill it, the less checks in Xen it requires the better */
-	memset((void *)(xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
+	memset(PAGE_ALIGNED(xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
 	/* Mark read-only */
 	HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
 	    pmap_pa2pte(xen_dummy_user_pgd) | PTE_P | pmap_pg_nx,
@@ -1542,7 +1545,7 @@ pmap_init_pcpu(void)
 		pa = pmap_bootstrap_palloc(1);
 		*pte = (pa & PTE_FRAME) | pteflags;
 		pmap_update_pg(tmpva);
-		memset((void *)tmpva, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
 
 		L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
 	}
@@ -1556,7 +1559,7 @@ pmap_init_pcpu(void)
 		pa = pmap_bootstrap_palloc(1);
 		*pte = (pa & PTE_FRAME) | pteflags;
 		pmap_update_pg(tmpva);
-		memset((void *)tmpva, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
 
 		L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
 	}
@@ -1571,7 +1574,7 @@ pmap_init_pcpu(void)
 		pa = pmap_bootstrap_palloc(1);
 		*pte = (pa & PTE_FRAME) | pteflags;
 		pmap_update_pg(tmpva);
-		memset((void *)tmpva, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
 
 		L2_BASE[L2e_idx+i] = pa | pteflags | PTE_A;
 	}
@@ -1663,7 +1666,7 @@ pmap_init_directmap(struct pmap *kpm)
 		pa = pmap_bootstrap_palloc(1);
 		*pte = (pa & PTE_FRAME) | pteflags;
 		pmap_update_pg(tmpva);
-		memset((void *)tmpva, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
 
 		L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
 	}
@@ -1677,7 +1680,7 @@ pmap_init_directmap(struct pmap *kpm)
 		pa = pmap_bootstrap_palloc(1);
 		*pte = (pa & PTE_FRAME) | pteflags;
 		pmap_update_pg(tmpva);
-		memset((void *)tmpva, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
 
 		L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
 	}
@@ -2627,7 +2630,7 @@ pmap_pdp_init(pd_entry_t *pdir)
 	int s;
 #endif
 
-	memset(pdir, 0, PDP_SIZE * PAGE_SIZE);
+	memset(PAGE_ALIGNED(pdir), 0, PDP_SIZE * PAGE_SIZE);
 
 	/*
 	 * NOTE: This is all done unlocked, but we will check afterwards
@@ -3805,7 +3808,7 @@ void
 pmap_zero_page(paddr_t pa)
 {
 #if defined(__HAVE_DIRECT_MAP)
-	memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE);
+	memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
 #else
 #if defined(XENPV)
 	if (XEN_VERSION_SUPPORTED(3, 4))
@@ -3829,7 +3832,7 @@ pmap_zero_page(paddr_t pa)
 	pmap_pte_flush();
 	pmap_update_pg(zerova);		/* flush TLB */
 
-	memset((void *)zerova, 0, PAGE_SIZE);
+	memset(PAGE_ALIGNED(zerova), 0, PAGE_SIZE);
 
 #if defined(DIAGNOSTIC) || defined(XENPV)
 	pmap_pte_set(zpte, 0);				/* zap ! */
@@ -3847,7 +3850,7 @@ pmap_copy_page(paddr_t srcpa, paddr_t ds
 	vaddr_t srcva = PMAP_DIRECT_MAP(srcpa);
 	vaddr_t dstva = PMAP_DIRECT_MAP(dstpa);
 
-	memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
+	memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);
 #else
 #if defined(XENPV)
 	if (XEN_VERSION_SUPPORTED(3, 4)) {
@@ -3877,7 +3880,7 @@ pmap_copy_page(paddr_t srcpa, paddr_t ds
 	pmap_update_pg(srcva);
 	pmap_update_pg(dstva);
 
-	memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
+	memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);
 
 #if defined(DIAGNOSTIC) || defined(XENPV)
 	pmap_pte_set(srcpte, 0);
@@ -5403,7 +5406,7 @@ pmap_get_physpage(void)
 		if (!uvm_page_physget(&pa))
 			panic("%s: out of memory", __func__);
 #if defined(__HAVE_DIRECT_MAP)
-		memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
 #else
 #if defined(XENPV)
 		if (XEN_VERSION_SUPPORTED(3, 4)) {
@@ -5416,7 +5419,7 @@ pmap_get_physpage(void)
 		    PTE_W | pmap_pg_nx);
 		pmap_pte_flush();
 		pmap_update_pg((vaddr_t)early_zerop);
-		memset(early_zerop, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(early_zerop), 0, PAGE_SIZE);
 #if defined(DIAGNOSTIC) || defined(XENPV)
 		pmap_pte_set(early_zero_pte, 0);
 		pmap_pte_flush();
@@ -5782,13 +5785,13 @@ pmap_init_tmp_pgtbl(paddr_t pg)
 	/* Zero levels 1-3 */
 	for (level = 0; level < PTP_LEVELS - 1; ++level) {
 		tmp_pml = (void *)x86_tmp_pml_vaddr[level];
-		memset(tmp_pml, 0, PAGE_SIZE);
+		memset(PAGE_ALIGNED(tmp_pml), 0, PAGE_SIZE);
 	}
 
 	/* Copy PML4 */
 	kernel_pml = pmap_kernel()->pm_pdir;
 	tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
-	memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
+	memcpy(PAGE_ALIGNED(tmp_pml), PAGE_ALIGNED(kernel_pml), PAGE_SIZE);
 
 #ifdef PAE
 	/*
@@ -6701,7 +6704,7 @@ pmap_ept_transform(struct pmap *pmap)
 	pmap->pm_write_protect = pmap_ept_write_protect;
 	pmap->pm_unwire = pmap_ept_unwire;
 
-	memset(pmap->pm_pdir, 0, PAGE_SIZE);
+	memset(PAGE_ALIGNED(pmap->pm_pdir), 0, PAGE_SIZE);
 }
 
 #endif /* __HAVE_DIRECT_MAP && __x86_64__ && !XENPV */

Reply via email to