Dave Voutila <d...@sisu.io> writes:
> tech@, > > The below diff tweaks how vmd and vmm define memory ranges (adding a > "type" attribute) so we can properly build an e820 memory map to hand to > things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting > bsd.rd). > > Why do it? We've been carrying a few patches to SeaBIOS in the ports > tree to hack around how vmd articulates some memory range details. By > finally implementing a proper bios memory map table we can drop some of > those patches. (Diff to ports@ coming shortly.) > > Bonus is it cleans up how we were hacking a bios memory map for direct > booting ramdisk kernels. > > Note: the below diff *will* work with the current SeaBIOS > (vmm-firmware), so you do *not* need to build the port. > > You will, however, need to: > - build, install, & reboot into a new kernel > - make sure you update /usr/include/amd64/vmmvar.h with a copy of > symlink to sys/arch/amd64/include/vmmvar.h > - rebuild & install vmctl > - rebuild & install vmd > > This should *not* result in any behavioral changes of current vmd > guests. If you notice any, especially guests failing to start, please > rebuild a kernel with VMM_DEBUG to help diagnose the regression. > Updated diff to fix some accounting issues with guest memory. (vmctl should report the correct max mem now.) As a result, adds in an MMIO range type (previous diff counted that range towards guest mem, though we don't actually fault in virtual memory to represent it to the guest). This has the added benefit of removing more knowledge from vmm(4) of what an emulated machine looks like, i.e. why does it care what the pci mmio range is? vmd(8) is responsible for that. I did also remove the "multiple of 1M" requirement for guest memory. Since I transitioned things to bytes awhile ago, no need to prohibit that. -dv diff refs/heads/master refs/heads/vmd-e820 commit - 9be741fe9857107e3610acb9a39e2972330b122d commit + ad422400e2f72c14c73d7f124f8b96d01d4ad4c5 blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436 blob + d69293fcd5fd98315181eb0dd77b653601530e9d --- sys/arch/amd64/amd64/vmm.c +++ sys/arch/amd64/amd64/vmm.c @@ -1631,8 +1631,8 @@ vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *v * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE. * * Return Values: - * The total memory size in MB if the checks were successful - * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was + * The total memory size in bytes if the checks were successful + * 0: One of the memory ranges was invalid or VMM_MAX_VM_MEM_SIZE was * exceeded */ size_t @@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct vm_create_params *vc const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE; if (vcp->vcp_nmemranges == 0 || - vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) + vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) { + DPRINTF("invalid number of guest memory ranges\n"); return (0); + } for (i = 0; i < vcp->vcp_nmemranges; i++) { vmr = &vcp->vcp_memranges[i]; /* Only page-aligned addresses and sizes are permitted */ if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) || - (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) + (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) { + DPRINTF("memory range %zu is not page aligned\n", i); return (0); + } /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ if (vmr->vmr_gpa >= maxgpa || - vmr->vmr_size > maxgpa - vmr->vmr_gpa) + vmr->vmr_size > maxgpa - vmr->vmr_gpa) { + DPRINTF("exceeded max memory size\n"); return (0); + } /* * Make sure that all virtual addresses are within the address @@ -1667,39 +1673,29 @@ vm_create_check_mem_ranges(struct vm_create_params *vc */ if (vmr->vmr_va < VM_MIN_ADDRESS || vmr->vmr_va >= VM_MAXUSER_ADDRESS || - vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) + vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) { + DPRINTF("guest va not within range or wraps\n"); return (0); + } /* - * Specifying ranges within the PCI MMIO space is forbidden. - * Disallow ranges that start inside the MMIO space: - * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] - */ - if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) - return (0); - - /* - * ... and disallow ranges that end inside the MMIO space: - * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] - */ - if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) - return (0); - - /* * Make sure that guest physical memory ranges do not overlap * and that they are ascending. */ - if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) + if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) { + DPRINTF("guest range %zu overlaps or !ascending\n", i); return (0); + } - memsize += vmr->vmr_size; + /* + * No memory is mappable in MMIO ranges, so don't count towards + * the total guest memory size. + */ + if (vmr->vmr_type != VM_MEM_MMIO) + memsize += vmr->vmr_size; pvmr = vmr; } - if (memsize % (1024 * 1024) != 0) - return (0); return (memsize); } @@ -5633,11 +5629,6 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) int i; struct vm_mem_range *vmr; - if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) { - DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); - return (VMM_MEM_TYPE_MMIO); - } - /* XXX Use binary search? */ for (i = 0; i < vm->vm_nmemranges; i++) { vmr = &vm->vm_memranges[i]; @@ -5649,8 +5640,11 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) if (gpa < vmr->vmr_gpa) break; - if (gpa < vmr->vmr_gpa + vmr->vmr_size) + if (gpa < vmr->vmr_gpa + vmr->vmr_size) { + if (vmr->vmr_type == VM_MEM_MMIO) + return (VMM_MEM_TYPE_MMIO); return (VMM_MEM_TYPE_REGULAR); + } } DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); blob - 94feca154717c1e3016990ad260036cd79e29b65 blob + 45582d39a9cf7624216c3335e576979ef64aea32 --- sys/arch/amd64/include/vmmvar.h +++ sys/arch/amd64/include/vmmvar.h @@ -451,6 +451,10 @@ struct vm_mem_range { paddr_t vmr_gpa; vaddr_t vmr_va; size_t vmr_size; + int vmr_type; +#define VM_MEM_RAM 0 /* Presented as usable system memory. */ +#define VM_MEM_RESERVED 1 /* Reserved for BIOS, etc. */ +#define VM_MEM_MMIO 2 /* Special region for device mmio. */ }; /* blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a blob + 9449adb74bffe88e56886f856d82f58fe9866cbb --- usr.sbin/vmd/fw_cfg.c +++ usr.sbin/vmd/fw_cfg.c @@ -16,6 +16,7 @@ */ #include <sys/types.h> #include <sys/uio.h> +#include <machine/biosvar.h> /* bios_memmap_t */ #include <machine/vmmvar.h> #include <stdlib.h> @@ -63,6 +64,8 @@ static int fw_cfg_select_file(uint16_t); static uint64_t fw_cfg_dma_addr; +static bios_memmap_t e820[VMM_MAX_MEM_RANGES]; + static int fw_cfg_select_file(uint16_t); static void fw_cfg_file_dir(void); @@ -71,7 +74,23 @@ fw_cfg_init(struct vmop_create_params *vmc) { const char *bootorder = NULL; unsigned int sd = 0; + size_t i, e820_len = 0; + /* Define e820 memory ranges. */ + memset(&e820, 0, sizeof(e820)); + for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) { + struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i]; + bios_memmap_t *entry = &e820[i]; + entry->addr = range->vmr_gpa; + entry->size = range->vmr_size; + if (range->vmr_type == VM_MEM_RAM) + entry->type = BIOS_MAP_FREE; + else + entry->type = BIOS_MAP_RES; + e820_len += sizeof(bios_memmap_t); + } + fw_cfg_add_file("etc/e820", &e820, e820_len); + /* do not double print chars on serial port */ fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd)); blob - 651719542d28ce44bccb0487867ece7e72686606 blob + 565725e1385c98f473f35ae257f7b061167fb076 --- usr.sbin/vmd/loadfile_elf.c +++ usr.sbin/vmd/loadfile_elf.c @@ -334,38 +334,23 @@ create_bios_memmap(struct vm_create_params *vcp, bios_ static size_t create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) { - size_t i, n = 0, sz; - paddr_t gpa; + size_t i, n = 0; struct vm_mem_range *vmr; - for (i = 0; i < vcp->vcp_nmemranges; i++) { + for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { vmr = &vcp->vcp_memranges[i]; - gpa = vmr->vmr_gpa; - sz = vmr->vmr_size; - - /* - * Make sure that we do not mark the ROM/video RAM area in the - * low memory as physcal memory available to the kernel. - */ - if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { - if (gpa >= LOWMEM_KB * 1024) - sz = 0; - else - sz = LOWMEM_KB * 1024 - gpa; - } - - if (sz != 0) { - memmap[n].addr = gpa; - memmap[n].size = sz; - memmap[n].type = 0x1; /* Type 1 : Normal memory */ - n++; - } + memmap[n].addr = vmr->vmr_gpa; + memmap[n].size = vmr->vmr_size; + if (vmr->vmr_type == VM_MEM_RAM) + memmap[n].type = BIOS_MAP_FREE; + else + memmap[n].type = BIOS_MAP_RES; } /* Null mem map entry to denote the end of the ranges */ memmap[n].addr = 0x0; memmap[n].size = 0x0; - memmap[n].type = 0x0; + memmap[n].type = BIOS_MAP_END; n++; return (n); blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29 blob + 4dfa3f920000d66c7ac53390009c7e280961c4f8 --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp) len = LOWMEM_KB * 1024; vcp->vcp_memranges[0].vmr_gpa = 0x0; vcp->vcp_memranges[0].vmr_size = len; + vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM; mem_bytes -= len; /* @@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp) len = MB(1) - (LOWMEM_KB * 1024); vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; vcp->vcp_memranges[1].vmr_size = len; + vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED; mem_bytes -= len; /* If we have less than 2MB remaining, still create a 2nd BIOS area. */ if (mem_bytes <= MB(2)) { vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END; vcp->vcp_memranges[2].vmr_size = MB(2); + vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED; vcp->vcp_nmemranges = 3; return; } @@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp) /* Third memory region: area above 1MB to MMIO region */ vcp->vcp_memranges[2].vmr_gpa = MB(1); vcp->vcp_memranges[2].vmr_size = above_1m; + vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM; - /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */ - vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; - vcp->vcp_memranges[3].vmr_size = MB(2); + /* Fourth region: PCI MMIO range */ + vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE; + vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END - + VMM_PCI_MMIO_BAR_BASE + 1; + vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO; - /* Fifth region: any remainder above 4GB */ + /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */ + vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; + vcp->vcp_memranges[4].vmr_size = MB(2); + vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED; + + /* Sixth region: any remainder above 4GB */ if (above_4g > 0) { - vcp->vcp_memranges[4].vmr_gpa = GB(4); - vcp->vcp_memranges[4].vmr_size = above_4g; + vcp->vcp_memranges[5].vmr_gpa = GB(4); + vcp->vcp_memranges[5].vmr_size = above_4g; + vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM; + vcp->vcp_nmemranges = 6; + } else vcp->vcp_nmemranges = 5; - } else - vcp->vcp_nmemranges = 4; } /*