*bump*... now looking for OKs!
Dave Voutila <d...@sisu.io> writes: > Dave Voutila <d...@sisu.io> writes: > >> tech@, >> >> The below diff tweaks how vmd and vmm define memory ranges (adding a >> "type" attribute) so we can properly build an e820 memory map to hand to >> things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting >> bsd.rd). >> >> Why do it? We've been carrying a few patches to SeaBIOS in the ports >> tree to hack around how vmd articulates some memory range details. By >> finally implementing a proper bios memory map table we can drop some of >> those patches. (Diff to ports@ coming shortly.) >> >> Bonus is it cleans up how we were hacking a bios memory map for direct >> booting ramdisk kernels. >> >> Note: the below diff *will* work with the current SeaBIOS >> (vmm-firmware), so you do *not* need to build the port. >> >> You will, however, need to: >> - build, install, & reboot into a new kernel >> - make sure you update /usr/include/amd64/vmmvar.h with a copy of >> symlink to sys/arch/amd64/include/vmmvar.h >> - rebuild & install vmctl >> - rebuild & install vmd >> >> This should *not* result in any behavioral changes of current vmd >> guests. If you notice any, especially guests failing to start, please >> rebuild a kernel with VMM_DEBUG to help diagnose the regression. >> > > Updated diff to fix some accounting issues with guest memory. (vmctl > should report the correct max mem now.) > > As a result, adds in an MMIO range type (previous diff counted that > range towards guest mem, though we don't actually fault in virtual > memory to represent it to the guest). > > This has the added benefit of removing more knowledge from vmm(4) of > what an emulated machine looks like, i.e. why does it care what the pci > mmio range is? vmd(8) is responsible for that. > > I did also remove the "multiple of 1M" requirement for guest > memory. Since I transitioned things to bytes awhile ago, no need to > prohibit that. > > -dv > > diff refs/heads/master refs/heads/vmd-e820 > commit - 9be741fe9857107e3610acb9a39e2972330b122d > commit + ad422400e2f72c14c73d7f124f8b96d01d4ad4c5 > blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436 > blob + d69293fcd5fd98315181eb0dd77b653601530e9d > --- sys/arch/amd64/amd64/vmm.c > +++ sys/arch/amd64/amd64/vmm.c > @@ -1631,8 +1631,8 @@ vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *v > * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE. > * > * Return Values: > - * The total memory size in MB if the checks were successful > - * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was > + * The total memory size in bytes if the checks were successful > + * 0: One of the memory ranges was invalid or VMM_MAX_VM_MEM_SIZE was > * exceeded > */ > size_t > @@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct vm_create_params *vc > const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE; > > if (vcp->vcp_nmemranges == 0 || > - vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) > + vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) { > + DPRINTF("invalid number of guest memory ranges\n"); > return (0); > + } > > for (i = 0; i < vcp->vcp_nmemranges; i++) { > vmr = &vcp->vcp_memranges[i]; > > /* Only page-aligned addresses and sizes are permitted */ > if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) || > - (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) > + (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) { > + DPRINTF("memory range %zu is not page aligned\n", i); > return (0); > + } > > /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ > if (vmr->vmr_gpa >= maxgpa || > - vmr->vmr_size > maxgpa - vmr->vmr_gpa) > + vmr->vmr_size > maxgpa - vmr->vmr_gpa) { > + DPRINTF("exceeded max memory size\n"); > return (0); > + } > > /* > * Make sure that all virtual addresses are within the address > @@ -1667,39 +1673,29 @@ vm_create_check_mem_ranges(struct vm_create_params *vc > */ > if (vmr->vmr_va < VM_MIN_ADDRESS || > vmr->vmr_va >= VM_MAXUSER_ADDRESS || > - vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) > + vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) { > + DPRINTF("guest va not within range or wraps\n"); > return (0); > + } > > /* > - * Specifying ranges within the PCI MMIO space is forbidden. > - * Disallow ranges that start inside the MMIO space: > - * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] > - */ > - if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE && > - vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) > - return (0); > - > - /* > - * ... and disallow ranges that end inside the MMIO space: > - * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] > - */ > - if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE && > - vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) > - return (0); > - > - /* > * Make sure that guest physical memory ranges do not overlap > * and that they are ascending. > */ > - if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) > + if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) { > + DPRINTF("guest range %zu overlaps or !ascending\n", i); > return (0); > + } > > - memsize += vmr->vmr_size; > + /* > + * No memory is mappable in MMIO ranges, so don't count towards > + * the total guest memory size. > + */ > + if (vmr->vmr_type != VM_MEM_MMIO) > + memsize += vmr->vmr_size; > pvmr = vmr; > } > > - if (memsize % (1024 * 1024) != 0) > - return (0); > return (memsize); > } > > @@ -5633,11 +5629,6 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) > int i; > struct vm_mem_range *vmr; > > - if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) { > - DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); > - return (VMM_MEM_TYPE_MMIO); > - } > - > /* XXX Use binary search? */ > for (i = 0; i < vm->vm_nmemranges; i++) { > vmr = &vm->vm_memranges[i]; > @@ -5649,8 +5640,11 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) > if (gpa < vmr->vmr_gpa) > break; > > - if (gpa < vmr->vmr_gpa + vmr->vmr_size) > + if (gpa < vmr->vmr_gpa + vmr->vmr_size) { > + if (vmr->vmr_type == VM_MEM_MMIO) > + return (VMM_MEM_TYPE_MMIO); > return (VMM_MEM_TYPE_REGULAR); > + } > } > > DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); > blob - 94feca154717c1e3016990ad260036cd79e29b65 > blob + 45582d39a9cf7624216c3335e576979ef64aea32 > --- sys/arch/amd64/include/vmmvar.h > +++ sys/arch/amd64/include/vmmvar.h > @@ -451,6 +451,10 @@ struct vm_mem_range { > paddr_t vmr_gpa; > vaddr_t vmr_va; > size_t vmr_size; > + int vmr_type; > +#define VM_MEM_RAM 0 /* Presented as usable system memory. */ > +#define VM_MEM_RESERVED 1 /* Reserved for BIOS, etc. */ > +#define VM_MEM_MMIO 2 /* Special region for device mmio. */ > }; > > /* > blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a > blob + 9449adb74bffe88e56886f856d82f58fe9866cbb > --- usr.sbin/vmd/fw_cfg.c > +++ usr.sbin/vmd/fw_cfg.c > @@ -16,6 +16,7 @@ > */ > #include <sys/types.h> > #include <sys/uio.h> > +#include <machine/biosvar.h> /* bios_memmap_t */ > #include <machine/vmmvar.h> > > #include <stdlib.h> > @@ -63,6 +64,8 @@ static int fw_cfg_select_file(uint16_t); > > static uint64_t fw_cfg_dma_addr; > > +static bios_memmap_t e820[VMM_MAX_MEM_RANGES]; > + > static int fw_cfg_select_file(uint16_t); > static void fw_cfg_file_dir(void); > > @@ -71,7 +74,23 @@ fw_cfg_init(struct vmop_create_params *vmc) > { > const char *bootorder = NULL; > unsigned int sd = 0; > + size_t i, e820_len = 0; > > + /* Define e820 memory ranges. */ > + memset(&e820, 0, sizeof(e820)); > + for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) { > + struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i]; > + bios_memmap_t *entry = &e820[i]; > + entry->addr = range->vmr_gpa; > + entry->size = range->vmr_size; > + if (range->vmr_type == VM_MEM_RAM) > + entry->type = BIOS_MAP_FREE; > + else > + entry->type = BIOS_MAP_RES; > + e820_len += sizeof(bios_memmap_t); > + } > + fw_cfg_add_file("etc/e820", &e820, e820_len); > + > /* do not double print chars on serial port */ > fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd)); > > blob - 651719542d28ce44bccb0487867ece7e72686606 > blob + 565725e1385c98f473f35ae257f7b061167fb076 > --- usr.sbin/vmd/loadfile_elf.c > +++ usr.sbin/vmd/loadfile_elf.c > @@ -334,38 +334,23 @@ create_bios_memmap(struct vm_create_params *vcp, bios_ > static size_t > create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) > { > - size_t i, n = 0, sz; > - paddr_t gpa; > + size_t i, n = 0; > struct vm_mem_range *vmr; > > - for (i = 0; i < vcp->vcp_nmemranges; i++) { > + for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { > vmr = &vcp->vcp_memranges[i]; > - gpa = vmr->vmr_gpa; > - sz = vmr->vmr_size; > - > - /* > - * Make sure that we do not mark the ROM/video RAM area in the > - * low memory as physcal memory available to the kernel. > - */ > - if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { > - if (gpa >= LOWMEM_KB * 1024) > - sz = 0; > - else > - sz = LOWMEM_KB * 1024 - gpa; > - } > - > - if (sz != 0) { > - memmap[n].addr = gpa; > - memmap[n].size = sz; > - memmap[n].type = 0x1; /* Type 1 : Normal memory */ > - n++; > - } > + memmap[n].addr = vmr->vmr_gpa; > + memmap[n].size = vmr->vmr_size; > + if (vmr->vmr_type == VM_MEM_RAM) > + memmap[n].type = BIOS_MAP_FREE; > + else > + memmap[n].type = BIOS_MAP_RES; > } > > /* Null mem map entry to denote the end of the ranges */ > memmap[n].addr = 0x0; > memmap[n].size = 0x0; > - memmap[n].type = 0x0; > + memmap[n].type = BIOS_MAP_END; > n++; > > return (n); > blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29 > blob + 4dfa3f920000d66c7ac53390009c7e280961c4f8 > --- usr.sbin/vmd/vm.c > +++ usr.sbin/vmd/vm.c > @@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp) > len = LOWMEM_KB * 1024; > vcp->vcp_memranges[0].vmr_gpa = 0x0; > vcp->vcp_memranges[0].vmr_size = len; > + vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM; > mem_bytes -= len; > > /* > @@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp) > len = MB(1) - (LOWMEM_KB * 1024); > vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; > vcp->vcp_memranges[1].vmr_size = len; > + vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED; > mem_bytes -= len; > > /* If we have less than 2MB remaining, still create a 2nd BIOS area. */ > if (mem_bytes <= MB(2)) { > vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END; > vcp->vcp_memranges[2].vmr_size = MB(2); > + vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED; > vcp->vcp_nmemranges = 3; > return; > } > @@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp) > /* Third memory region: area above 1MB to MMIO region */ > vcp->vcp_memranges[2].vmr_gpa = MB(1); > vcp->vcp_memranges[2].vmr_size = above_1m; > + vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM; > > - /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */ > - vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; > - vcp->vcp_memranges[3].vmr_size = MB(2); > + /* Fourth region: PCI MMIO range */ > + vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE; > + vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END - > + VMM_PCI_MMIO_BAR_BASE + 1; > + vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO; > > - /* Fifth region: any remainder above 4GB */ > + /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */ > + vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; > + vcp->vcp_memranges[4].vmr_size = MB(2); > + vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED; > + > + /* Sixth region: any remainder above 4GB */ > if (above_4g > 0) { > - vcp->vcp_memranges[4].vmr_gpa = GB(4); > - vcp->vcp_memranges[4].vmr_size = above_4g; > + vcp->vcp_memranges[5].vmr_gpa = GB(4); > + vcp->vcp_memranges[5].vmr_size = above_4g; > + vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM; > + vcp->vcp_nmemranges = 6; > + } else > vcp->vcp_nmemranges = 5; > - } else > - vcp->vcp_nmemranges = 4; > } > > /*