Module Name:    src
Committed By:   maxv
Date:           Tue Dec 20 14:03:15 UTC 2016

Modified Files:
        src/sys/arch/amd64/amd64: machdep.c
        src/sys/arch/i386/i386: machdep.c
        src/sys/arch/x86/x86: pmap.c x86_machdep.c

Log Message:
When the i386 port was designed, the bootstrap code needed little physical
memory, and taking it below the kernel image was fine: we had 160 free
pages, and never allocated more than 20. With amd64 however, we create a
direct map, and for this map we need a number of page table pages that is
mostly proportionate to the number of physical addresses available, which
implies that these 160 free pages may not be enough.

In particular, if the CPU does not support 1GB superpages, each 1GB chunk
of physical memory needs a 4k page in the direct map, which means that if
a machine has 160GB of ram, the bootstrap code allocates more than 160
pages, thereby overwriting the I/O mem area. If we push a little further,
if a machine has 512GB of ram, we allocate ~525 pages, and start
overwriting the kernel text, causing the system to go crazy at boot time.

Fix this moving the physical allocation area from below the kernel to above
it. avail_start is now beyond the kernel, and lowmem_rsvd indicates the
reserved low-memory pages. The area [lowmem_rsvd; IOM_BEGIN[ is
internalized into UVM, so there is no pa loss.

The only limit now is the pa of LAPIC, which is located at ~4GB of memory,
so it is perfectly fine.

This change theoretically adds va support for 512GB of ram; and it is a
prerequisite if we want to support more memory anyway.


To generate a diff of this commit:
cvs rdiff -u -r1.239 -r1.240 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.769 -r1.770 src/sys/arch/i386/i386/machdep.c
cvs rdiff -u -r1.233 -r1.234 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.78 -r1.79 src/sys/arch/x86/x86/x86_machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.239 src/sys/arch/amd64/amd64/machdep.c:1.240
--- src/sys/arch/amd64/amd64/machdep.c:1.239	Fri Dec 16 20:16:50 2016
+++ src/sys/arch/amd64/amd64/machdep.c	Tue Dec 20 14:03:15 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.239 2016/12/16 20:16:50 maxv Exp $	*/
+/*	$NetBSD: machdep.c,v 1.240 2016/12/20 14:03:15 maxv Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.239 2016/12/16 20:16:50 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.240 2016/12/20 14:03:15 maxv Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -276,6 +276,7 @@ vaddr_t kern_end;
 
 struct vm_map *phys_map = NULL;
 
+extern paddr_t lowmem_rsvd;
 extern paddr_t avail_start, avail_end;
 #ifdef XEN
 extern paddr_t pmap_pa_start, pmap_pa_end;
@@ -1584,6 +1585,8 @@ init_x86_64(paddr_t first_avail)
 
 	uvmexp.ncolors = 2;
 
+	avail_start = first_avail;
+
 #ifndef XEN
 	/*
 	 * Low memory reservations:
@@ -1596,22 +1599,18 @@ init_x86_64(paddr_t first_avail)
 	 * Page 6:	Temporary page map level 3
 	 * Page 7:	Temporary page map level 4
 	 */
-	avail_start = 8 * PAGE_SIZE;
+	lowmem_rsvd = 8 * PAGE_SIZE;
 
 	/* Initialize the memory clusters (needed in pmap_boostrap). */
 	init_x86_clusters();
-#else	/* XEN */
+#else
 	/* Parse Xen command line (replace bootinfo) */
 	xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
 
-	/* Determine physical address space */
-	avail_start = first_avail;
 	avail_end = ctob(xen_start_info.nr_pages);
 	pmap_pa_start = (KERNTEXTOFF - KERNBASE);
 	pmap_pa_end = avail_end;
-	__PRINTK(("pmap_pa_start 0x%lx avail_start 0x%lx avail_end 0x%lx\n",
-	    pmap_pa_start, avail_start, avail_end));
-#endif	/* !XEN */
+#endif
 
 	/* End of the virtual space we have created so far. */
 	kern_end = (vaddr_t)atdevbase + IOM_SIZE;
@@ -1628,14 +1627,12 @@ init_x86_64(paddr_t first_avail)
 
 #ifndef XEN
 	/* Internalize the physical pages into the VM system. */
-	init_x86_vm(first_avail);
-#else	/* XEN */
+	init_x86_vm(avail_start);
+#else
 	physmem = xen_start_info.nr_pages;
-
-	uvm_page_physload(atop(avail_start),
-		atop(avail_end), atop(avail_start),
-		atop(avail_end), VM_FREELIST_DEFAULT);
-#endif	/* !XEN */
+	uvm_page_physload(atop(avail_start), atop(avail_end),
+	    atop(avail_start), atop(avail_end), VM_FREELIST_DEFAULT);
+#endif
 
 	init_x86_64_msgbuf();
 

Index: src/sys/arch/i386/i386/machdep.c
diff -u src/sys/arch/i386/i386/machdep.c:1.769 src/sys/arch/i386/i386/machdep.c:1.770
--- src/sys/arch/i386/i386/machdep.c:1.769	Sat Dec 17 14:27:53 2016
+++ src/sys/arch/i386/i386/machdep.c	Tue Dec 20 14:03:15 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.769 2016/12/17 14:27:53 maxv Exp $	*/
+/*	$NetBSD: machdep.c,v 1.770 2016/12/20 14:03:15 maxv Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.769 2016/12/17 14:27:53 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.770 2016/12/20 14:03:15 maxv Exp $");
 
 #include "opt_beep.h"
 #include "opt_compat_ibcs2.h"
@@ -254,6 +254,7 @@ vaddr_t pentium_idt_vaddr;
 
 struct vm_map *phys_map = NULL;
 
+extern paddr_t lowmem_rsvd;
 extern paddr_t avail_start, avail_end;
 #ifdef XEN
 extern paddr_t pmap_pa_start, pmap_pa_end;
@@ -1199,6 +1200,8 @@ init386(paddr_t first_avail)
 	 */
 	uvmexp.ncolors = 2;
 
+	avail_start = first_avail;
+
 #ifndef XEN
 	/*
 	 * Low memory reservations:
@@ -1209,7 +1212,7 @@ init386(paddr_t first_avail)
 	 * Page 4:	Temporary page table for 0MB-4MB
 	 * Page 5:	Temporary page directory
 	 */
-	avail_start = 6 * PAGE_SIZE;
+	lowmem_rsvd = 6 * PAGE_SIZE;
 #else /* !XEN */
 	/* Parse Xen command line (replace bootinfo) */
 	xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
@@ -1219,8 +1222,6 @@ init386(paddr_t first_avail)
 	gdt = (void *)xen_dummy_page;
 
 	/* Determine physical address space */
-	first_avail = round_page(first_avail);
-	avail_start = first_avail;
 	avail_end = ctob((paddr_t)xen_start_info.nr_pages);
 	pmap_pa_start = (KERNTEXTOFF - KERNBASE);
 	pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages);
@@ -1261,7 +1262,7 @@ init386(paddr_t first_avail)
 	init_x86_clusters();
 
 	/* Internalize the physical pages into the VM system. */
-	init_x86_vm(first_avail);
+	init_x86_vm(avail_start);
 #else /* !XEN */
 	XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - "
 	    "0x%" PRIx64 " (%" PRId64 ")\n",

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.233 src/sys/arch/x86/x86/pmap.c:1.234
--- src/sys/arch/x86/x86/pmap.c:1.233	Sat Dec 17 13:43:33 2016
+++ src/sys/arch/x86/x86/pmap.c	Tue Dec 20 14:03:15 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.233 2016/12/17 13:43:33 maxv Exp $	*/
+/*	$NetBSD: pmap.c,v 1.234 2016/12/20 14:03:15 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2010, 2016 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.233 2016/12/17 13:43:33 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.234 2016/12/20 14:03:15 maxv Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -393,6 +393,7 @@ int pmap_largepages __read_mostly = 0;
  * (shared with machdep.c) describe the physical address space
  * of this machine.
  */
+paddr_t lowmem_rsvd __read_mostly;
 paddr_t avail_start __read_mostly; /* PA of first available physical page */
 paddr_t avail_end __read_mostly; /* PA of last available physical page */
 

Index: src/sys/arch/x86/x86/x86_machdep.c
diff -u src/sys/arch/x86/x86/x86_machdep.c:1.78 src/sys/arch/x86/x86/x86_machdep.c:1.79
--- src/sys/arch/x86/x86/x86_machdep.c:1.78	Tue Dec 20 12:48:30 2016
+++ src/sys/arch/x86/x86/x86_machdep.c	Tue Dec 20 14:03:15 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_machdep.c,v 1.78 2016/12/20 12:48:30 maxv Exp $	*/
+/*	$NetBSD: x86_machdep.c,v 1.79 2016/12/20 14:03:15 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi,
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.78 2016/12/20 12:48:30 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.79 2016/12/20 14:03:15 maxv Exp $");
 
 #include "opt_modular.h"
 #include "opt_physmem.h"
@@ -482,7 +482,7 @@ static struct {
 	{ VM_FREELIST_FIRST16,	16 * 1024 * 1024 },
 };
 
-extern paddr_t avail_start, avail_end;
+extern paddr_t lowmem_rsvd, avail_end;
 
 int
 x86_select_freelist(uint64_t maxaddr)
@@ -833,7 +833,7 @@ init_x86_clusters(void)
 
 /*
  * init_x86_vm: initialize the VM system on x86. We basically internalize as
- * many physical pages as we can, starting at avail_start, but we don't
+ * many physical pages as we can, starting at lowmem_rsvd, but we don't
  * internalize the kernel physical pages (from pa_kstart to pa_kend).
  */
 int
@@ -865,11 +865,11 @@ init_x86_vm(paddr_t pa_kend)
 		seg_end1 = 0;
 
 		/* Skip memory before our available starting point. */
-		if (seg_end <= avail_start)
+		if (seg_end <= lowmem_rsvd)
 			continue;
 
-		if (seg_start <= avail_start && avail_start < seg_end) {
-			seg_start = avail_start;
+		if (seg_start <= lowmem_rsvd && lowmem_rsvd < seg_end) {
+			seg_start = lowmem_rsvd;
 			if (seg_start == seg_end)
 				continue;
 		}

Reply via email to