tech@, The below diff tweaks how vmd and vmm define memory ranges (adding a "type" attribute) so we can properly build an e820 memory map to hand to things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting bsd.rd).
Why do it? We've been carrying a few patches to SeaBIOS in the ports tree to hack around how vmd articulates some memory range details. By finally implementing a proper bios memory map table we can drop some of those patches. (Diff to ports@ coming shortly.) Bonus is it cleans up how we were hacking a bios memory map for direct booting ramdisk kernels. Note: the below diff *will* work with the current SeaBIOS (vmm-firmware), so you do *not* need to build the port. You will, however, need to: - build, install, & reboot into a new kernel - make sure you update /usr/include/amd64/vmmvar.h with a copy of symlink to sys/arch/amd64/include/vmmvar.h - rebuild & install vmctl - rebuild & install vmd This should *not* result in any behavioral changes of current vmd guests. If you notice any, especially guests failing to start, please rebuild a kernel with VMM_DEBUG to help diagnose the regression. -dv diff refs/heads/master refs/heads/vmd-e820 commit - a96642fb40af450c6576e205fab247cdbce0b5ed commit + f3cb01998127d200e95ff9984a7503eb16c2a8d8 blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436 blob + f2a464217838d3f0a50e4131b5b074b315e490fb --- sys/arch/amd64/amd64/vmm.c +++ sys/arch/amd64/amd64/vmm.c @@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct vm_create_params *vc const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE; if (vcp->vcp_nmemranges == 0 || - vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) + vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) { + DPRINTF("invalid number of guest memory ranges\n"); return (0); + } for (i = 0; i < vcp->vcp_nmemranges; i++) { vmr = &vcp->vcp_memranges[i]; /* Only page-aligned addresses and sizes are permitted */ if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) || - (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) + (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) { + DPRINTF("memory range %zu is not page aligned\n", i); return (0); + } /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ if (vmr->vmr_gpa >= maxgpa || - vmr->vmr_size > maxgpa - vmr->vmr_gpa) + vmr->vmr_size > maxgpa - vmr->vmr_gpa) { + DPRINTF("exceeded max memory size\n"); return (0); + } /* * Make sure that all virtual addresses are within the address @@ -1667,39 +1673,55 @@ vm_create_check_mem_ranges(struct vm_create_params *vc */ if (vmr->vmr_va < VM_MIN_ADDRESS || vmr->vmr_va >= VM_MAXUSER_ADDRESS || - vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) + vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) { + DPRINTF("guest va not within range or wraps\n"); return (0); + } /* * Specifying ranges within the PCI MMIO space is forbidden. * Disallow ranges that start inside the MMIO space: * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] */ - if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) + if (vmr->vmr_type == VM_MEM_RAM && + vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE && + vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) { + DPRINTF("guest RAM range %zu cannot being in mmio range" + " (gpa=0x%lx)\n", i, vmr->vmr_gpa); return (0); + } /* * ... and disallow ranges that end inside the MMIO space: * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] */ - if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) + if (vmr->vmr_type == VM_MEM_RAM && + vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE && + vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) { + DPRINTF("guest RAM range %zu cannot end in mmio range" + " (gpa=0x%lx, sz=0x%lx)\n", i, vmr->vmr_gpa, + vmr->vmr_size); return (0); + } /* * Make sure that guest physical memory ranges do not overlap * and that they are ascending. */ - if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) + if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) { + DPRINTF("guest range %zu overlaps or !ascending\n", i); return (0); + } memsize += vmr->vmr_size; pvmr = vmr; } - if (memsize % (1024 * 1024) != 0) + if (memsize % (1024 * 1024) != 0) { + DPRINTF("memory size not a multiple of 1MB\n"); return (0); + } + return (memsize); } blob - 94feca154717c1e3016990ad260036cd79e29b65 blob + 2c57f10b9340e8a779f50bee18d235a299721571 --- sys/arch/amd64/include/vmmvar.h +++ sys/arch/amd64/include/vmmvar.h @@ -451,6 +451,9 @@ struct vm_mem_range { paddr_t vmr_gpa; vaddr_t vmr_va; size_t vmr_size; + int vmr_type; +#define VM_MEM_RAM 0 +#define VM_MEM_RESERVED 1 }; /* blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a blob + eb0bea236ed0d6c4d68f6699eb6720ef8fca296c --- usr.sbin/vmd/fw_cfg.c +++ usr.sbin/vmd/fw_cfg.c @@ -16,6 +16,7 @@ */ #include <sys/types.h> #include <sys/uio.h> +#include <machine/biosvar.h> /* bios_memmap_t */ #include <machine/vmmvar.h> #include <stdlib.h> @@ -63,6 +64,8 @@ static int fw_cfg_select_file(uint16_t); static uint64_t fw_cfg_dma_addr; +static bios_memmap_t e820[VMM_MAX_MEM_RANGES]; + static int fw_cfg_select_file(uint16_t); static void fw_cfg_file_dir(void); @@ -71,7 +74,27 @@ fw_cfg_init(struct vmop_create_params *vmc) { const char *bootorder = NULL; unsigned int sd = 0; + size_t i, e820_len = 0; + /* Define e820 memory ranges. */ + memset(&e820, 0, sizeof(e820)); + for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) { + struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i]; + bios_memmap_t *entry = &e820[i]; + + entry->addr = range->vmr_gpa; + entry->size = range->vmr_size; + if (range->vmr_type == VM_MEM_RAM) + entry->type = BIOS_MAP_FREE; + else if (range->vmr_type == VM_MEM_RESERVED) + entry->type = BIOS_MAP_RES; + else + fatalx("undefined memory type %d", entry->type); + + e820_len += sizeof(bios_memmap_t); + } + fw_cfg_add_file("etc/e820", &e820, e820_len); + /* do not double print chars on serial port */ fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd)); blob - 651719542d28ce44bccb0487867ece7e72686606 blob + b7f79eb9e140073f75563a6dcb5fdad3cb2b2d22 --- usr.sbin/vmd/loadfile_elf.c +++ usr.sbin/vmd/loadfile_elf.c @@ -334,38 +334,26 @@ create_bios_memmap(struct vm_create_params *vcp, bios_ static size_t create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) { - size_t i, n = 0, sz; - paddr_t gpa; + size_t i, n = 0; struct vm_mem_range *vmr; - for (i = 0; i < vcp->vcp_nmemranges; i++) { + for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { vmr = &vcp->vcp_memranges[i]; - gpa = vmr->vmr_gpa; - sz = vmr->vmr_size; - - /* - * Make sure that we do not mark the ROM/video RAM area in the - * low memory as physcal memory available to the kernel. - */ - if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { - if (gpa >= LOWMEM_KB * 1024) - sz = 0; - else - sz = LOWMEM_KB * 1024 - gpa; - } - - if (sz != 0) { - memmap[n].addr = gpa; - memmap[n].size = sz; - memmap[n].type = 0x1; /* Type 1 : Normal memory */ - n++; - } + memmap[n].addr = vmr->vmr_gpa; + memmap[n].size = vmr->vmr_size; + if (vmr->vmr_type == VM_MEM_RAM) + memmap[n].type = BIOS_MAP_FREE; + else if (vmr->vmr_type == VM_MEM_RESERVED) + memmap[n].type = BIOS_MAP_RES; + else + fatalx("%s: invalid vm memory range type %d\n", + __func__, vmr->vmr_type); } /* Null mem map entry to denote the end of the ranges */ memmap[n].addr = 0x0; memmap[n].size = 0x0; - memmap[n].type = 0x0; + memmap[n].type = BIOS_MAP_END; n++; return (n); blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29 blob + 7a1b3bb39cfd4651b076bf5c5e74012bdd11754e --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp) len = LOWMEM_KB * 1024; vcp->vcp_memranges[0].vmr_gpa = 0x0; vcp->vcp_memranges[0].vmr_size = len; + vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM; mem_bytes -= len; /* @@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp) len = MB(1) - (LOWMEM_KB * 1024); vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; vcp->vcp_memranges[1].vmr_size = len; + vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED; mem_bytes -= len; /* If we have less than 2MB remaining, still create a 2nd BIOS area. */ if (mem_bytes <= MB(2)) { vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END; vcp->vcp_memranges[2].vmr_size = MB(2); + vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED; vcp->vcp_nmemranges = 3; return; } @@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp) /* Third memory region: area above 1MB to MMIO region */ vcp->vcp_memranges[2].vmr_gpa = MB(1); vcp->vcp_memranges[2].vmr_size = above_1m; + vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM; - /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */ - vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; - vcp->vcp_memranges[3].vmr_size = MB(2); + /* Fourth region: PCI MMIO range */ + vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE; + vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END - + VMM_PCI_MMIO_BAR_BASE + 1; + vcp->vcp_memranges[3].vmr_type = VM_MEM_RESERVED; - /* Fifth region: any remainder above 4GB */ + /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */ + vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; + vcp->vcp_memranges[4].vmr_size = MB(2); + vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED; + + /* Sixth region: any remainder above 4GB */ if (above_4g > 0) { - vcp->vcp_memranges[4].vmr_gpa = GB(4); - vcp->vcp_memranges[4].vmr_size = above_4g; + vcp->vcp_memranges[5].vmr_gpa = GB(4); + vcp->vcp_memranges[5].vmr_size = above_4g; + vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM; + vcp->vcp_nmemranges = 6; + } else vcp->vcp_nmemranges = 5; - } else - vcp->vcp_nmemranges = 4; } /*