Hi Dave,
Great stuff!!
Everything is patched, build and booted.
What is the best way to test this?
Start a bunch of VMs with bsd.rd? Does this still need to be a
decompressed bsd.rd?
Mischa
On 2022-12-10 23:51, Dave Voutila wrote:
tech@,
The below diff tweaks how vmd and vmm define memory ranges (adding a
"type" attribute) so we can properly build an e820 memory map to hand
to
things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting
bsd.rd).
Why do it? We've been carrying a few patches to SeaBIOS in the ports
tree to hack around how vmd articulates some memory range details. By
finally implementing a proper bios memory map table we can drop some of
those patches. (Diff to ports@ coming shortly.)
Bonus is it cleans up how we were hacking a bios memory map for direct
booting ramdisk kernels.
Note: the below diff *will* work with the current SeaBIOS
(vmm-firmware), so you do *not* need to build the port.
You will, however, need to:
- build, install, & reboot into a new kernel
- make sure you update /usr/include/amd64/vmmvar.h with a copy of
symlink to sys/arch/amd64/include/vmmvar.h
- rebuild & install vmctl
- rebuild & install vmd
This should *not* result in any behavioral changes of current vmd
guests. If you notice any, especially guests failing to start, please
rebuild a kernel with VMM_DEBUG to help diagnose the regression.
-dv
diff refs/heads/master refs/heads/vmd-e820
commit - a96642fb40af450c6576e205fab247cdbce0b5ed
commit + f3cb01998127d200e95ff9984a7503eb16c2a8d8
blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436
blob + f2a464217838d3f0a50e4131b5b074b315e490fb
--- sys/arch/amd64/amd64/vmm.c
+++ sys/arch/amd64/amd64/vmm.c
@@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct
vm_create_params *vc
const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
if (vcp->vcp_nmemranges == 0 ||
- vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
+ vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
+ DPRINTF("invalid number of guest memory ranges\n");
return (0);
+ }
for (i = 0; i < vcp->vcp_nmemranges; i++) {
vmr = &vcp->vcp_memranges[i];
/* Only page-aligned addresses and sizes are permitted */
if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
- (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0)
+ (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
+ DPRINTF("memory range %zu is not page aligned\n", i);
return (0);
+ }
/* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */
if (vmr->vmr_gpa >= maxgpa ||
- vmr->vmr_size > maxgpa - vmr->vmr_gpa)
+ vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
+ DPRINTF("exceeded max memory size\n");
return (0);
+ }
/*
* Make sure that all virtual addresses are within the address
@@ -1667,39 +1673,55 @@ vm_create_check_mem_ranges(struct
vm_create_params *vc
*/
if (vmr->vmr_va < VM_MIN_ADDRESS ||
vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
- vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va)
+ vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
+ DPRINTF("guest va not within range or wraps\n");
return (0);
+ }
/*
* Specifying ranges within the PCI MMIO space is forbidden.
* Disallow ranges that start inside the MMIO space:
* [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
*/
- if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
- vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END)
+ if (vmr->vmr_type == VM_MEM_RAM &&
+ vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
+ vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) {
+ DPRINTF("guest RAM range %zu cannot being in mmio range"
+ " (gpa=0x%lx)\n", i, vmr->vmr_gpa);
return (0);
+ }
/*
* ... and disallow ranges that end inside the MMIO space:
* (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
*/
- if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
- vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END)
+ if (vmr->vmr_type == VM_MEM_RAM &&
+ vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
+ vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) {
+ DPRINTF("guest RAM range %zu cannot end in mmio range"
+ " (gpa=0x%lx, sz=0x%lx)\n", i, vmr->vmr_gpa,
+ vmr->vmr_size);
return (0);
+ }
/*
* Make sure that guest physical memory ranges do not overlap
* and that they are ascending.
*/
- if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa)
+ if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
+ DPRINTF("guest range %zu overlaps or !ascending\n", i);
return (0);
+ }
memsize += vmr->vmr_size;
pvmr = vmr;
}
- if (memsize % (1024 * 1024) != 0)
+ if (memsize % (1024 * 1024) != 0) {
+ DPRINTF("memory size not a multiple of 1MB\n");
return (0);
+ }
+
return (memsize);
}
blob - 94feca154717c1e3016990ad260036cd79e29b65
blob + 2c57f10b9340e8a779f50bee18d235a299721571
--- sys/arch/amd64/include/vmmvar.h
+++ sys/arch/amd64/include/vmmvar.h
@@ -451,6 +451,9 @@ struct vm_mem_range {
paddr_t vmr_gpa;
vaddr_t vmr_va;
size_t vmr_size;
+ int vmr_type;
+#define VM_MEM_RAM 0
+#define VM_MEM_RESERVED 1
};
/*
blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a
blob + eb0bea236ed0d6c4d68f6699eb6720ef8fca296c
--- usr.sbin/vmd/fw_cfg.c
+++ usr.sbin/vmd/fw_cfg.c
@@ -16,6 +16,7 @@
*/
#include <sys/types.h>
#include <sys/uio.h>
+#include <machine/biosvar.h> /* bios_memmap_t */
#include <machine/vmmvar.h>
#include <stdlib.h>
@@ -63,6 +64,8 @@ static int fw_cfg_select_file(uint16_t);
static uint64_t fw_cfg_dma_addr;
+static bios_memmap_t e820[VMM_MAX_MEM_RANGES];
+
static int fw_cfg_select_file(uint16_t);
static void fw_cfg_file_dir(void);
@@ -71,7 +74,27 @@ fw_cfg_init(struct vmop_create_params *vmc)
{
const char *bootorder = NULL;
unsigned int sd = 0;
+ size_t i, e820_len = 0;
+ /* Define e820 memory ranges. */
+ memset(&e820, 0, sizeof(e820));
+ for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) {
+ struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i];
+ bios_memmap_t *entry = &e820[i];
+
+ entry->addr = range->vmr_gpa;
+ entry->size = range->vmr_size;
+ if (range->vmr_type == VM_MEM_RAM)
+ entry->type = BIOS_MAP_FREE;
+ else if (range->vmr_type == VM_MEM_RESERVED)
+ entry->type = BIOS_MAP_RES;
+ else
+ fatalx("undefined memory type %d", entry->type);
+
+ e820_len += sizeof(bios_memmap_t);
+ }
+ fw_cfg_add_file("etc/e820", &e820, e820_len);
+
/* do not double print chars on serial port */
fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd));
blob - 651719542d28ce44bccb0487867ece7e72686606
blob + b7f79eb9e140073f75563a6dcb5fdad3cb2b2d22
--- usr.sbin/vmd/loadfile_elf.c
+++ usr.sbin/vmd/loadfile_elf.c
@@ -334,38 +334,26 @@ create_bios_memmap(struct vm_create_params *vcp,
bios_
static size_t
create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t
*memmap)
{
- size_t i, n = 0, sz;
- paddr_t gpa;
+ size_t i, n = 0;
struct vm_mem_range *vmr;
- for (i = 0; i < vcp->vcp_nmemranges; i++) {
+ for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
vmr = &vcp->vcp_memranges[i];
- gpa = vmr->vmr_gpa;
- sz = vmr->vmr_size;
-
- /*
- * Make sure that we do not mark the ROM/video RAM area in the
- * low memory as physcal memory available to the kernel.
- */
- if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
- if (gpa >= LOWMEM_KB * 1024)
- sz = 0;
- else
- sz = LOWMEM_KB * 1024 - gpa;
- }
-
- if (sz != 0) {
- memmap[n].addr = gpa;
- memmap[n].size = sz;
- memmap[n].type = 0x1; /* Type 1 : Normal memory */
- n++;
- }
+ memmap[n].addr = vmr->vmr_gpa;
+ memmap[n].size = vmr->vmr_size;
+ if (vmr->vmr_type == VM_MEM_RAM)
+ memmap[n].type = BIOS_MAP_FREE;
+ else if (vmr->vmr_type == VM_MEM_RESERVED)
+ memmap[n].type = BIOS_MAP_RES;
+ else
+ fatalx("%s: invalid vm memory range type %d\n",
+ __func__, vmr->vmr_type);
}
/* Null mem map entry to denote the end of the ranges */
memmap[n].addr = 0x0;
memmap[n].size = 0x0;
- memmap[n].type = 0x0;
+ memmap[n].type = BIOS_MAP_END;
n++;
return (n);
blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
blob + 7a1b3bb39cfd4651b076bf5c5e74012bdd11754e
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp)
len = LOWMEM_KB * 1024;
vcp->vcp_memranges[0].vmr_gpa = 0x0;
vcp->vcp_memranges[0].vmr_size = len;
+ vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM;
mem_bytes -= len;
/*
@@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp)
len = MB(1) - (LOWMEM_KB * 1024);
vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
vcp->vcp_memranges[1].vmr_size = len;
+ vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED;
mem_bytes -= len;
/* If we have less than 2MB remaining, still create a 2nd BIOS area.
*/
if (mem_bytes <= MB(2)) {
vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END;
vcp->vcp_memranges[2].vmr_size = MB(2);
+ vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED;
vcp->vcp_nmemranges = 3;
return;
}
@@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp)
/* Third memory region: area above 1MB to MMIO region */
vcp->vcp_memranges[2].vmr_gpa = MB(1);
vcp->vcp_memranges[2].vmr_size = above_1m;
+ vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM;
- /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */
- vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
- vcp->vcp_memranges[3].vmr_size = MB(2);
+ /* Fourth region: PCI MMIO range */
+ vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE;
+ vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END -
+ VMM_PCI_MMIO_BAR_BASE + 1;
+ vcp->vcp_memranges[3].vmr_type = VM_MEM_RESERVED;
- /* Fifth region: any remainder above 4GB */
+ /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */
+ vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
+ vcp->vcp_memranges[4].vmr_size = MB(2);
+ vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED;
+
+ /* Sixth region: any remainder above 4GB */
if (above_4g > 0) {
- vcp->vcp_memranges[4].vmr_gpa = GB(4);
- vcp->vcp_memranges[4].vmr_size = above_4g;
+ vcp->vcp_memranges[5].vmr_gpa = GB(4);
+ vcp->vcp_memranges[5].vmr_size = above_4g;
+ vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM;
+ vcp->vcp_nmemranges = 6;
+ } else
vcp->vcp_nmemranges = 5;
- } else
- vcp->vcp_nmemranges = 4;
}
/*