*bump*... now looking for OKs!

Dave Voutila <d...@sisu.io> writes:

> Dave Voutila <d...@sisu.io> writes:
>
>> tech@,
>>
>> The below diff tweaks how vmd and vmm define memory ranges (adding a
>> "type" attribute) so we can properly build an e820 memory map to hand to
>> things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting
>> bsd.rd).
>>
>> Why do it? We've been carrying a few patches to SeaBIOS in the ports
>> tree to hack around how vmd articulates some memory range details. By
>> finally implementing a proper bios memory map table we can drop some of
>> those patches. (Diff to ports@ coming shortly.)
>>
>> Bonus is it cleans up how we were hacking a bios memory map for direct
>> booting ramdisk kernels.
>>
>> Note: the below diff *will* work with the current SeaBIOS
>> (vmm-firmware), so you do *not* need to build the port.
>>
>> You will, however, need to:
>> - build, install, & reboot into a new kernel
>> - make sure you update /usr/include/amd64/vmmvar.h with a copy of
>>   symlink to sys/arch/amd64/include/vmmvar.h
>> - rebuild & install vmctl
>> - rebuild & install vmd
>>
>> This should *not* result in any behavioral changes of current vmd
>> guests. If you notice any, especially guests failing to start, please
>> rebuild a kernel with VMM_DEBUG to help diagnose the regression.
>>
>
> Updated diff to fix some accounting issues with guest memory. (vmctl
> should report the correct max mem now.)
>
> As a result, adds in an MMIO range type (previous diff counted that
> range towards guest mem, though we don't actually fault in virtual
> memory to represent it to the guest).
>
> This has the added benefit of removing more knowledge from vmm(4) of
> what an emulated machine looks like, i.e. why does it care what the pci
> mmio range is? vmd(8) is responsible for that.
>
> I did also remove the "multiple of 1M" requirement for guest
> memory. Since I transitioned things to bytes awhile ago, no need to
> prohibit that.
>
> -dv
>
> diff refs/heads/master refs/heads/vmd-e820
> commit - 9be741fe9857107e3610acb9a39e2972330b122d
> commit + ad422400e2f72c14c73d7f124f8b96d01d4ad4c5
> blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436
> blob + d69293fcd5fd98315181eb0dd77b653601530e9d
> --- sys/arch/amd64/amd64/vmm.c
> +++ sys/arch/amd64/amd64/vmm.c
> @@ -1631,8 +1631,8 @@ vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *v
>   * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE.
>   *
>   * Return Values:
> - *   The total memory size in MB if the checks were successful
> - *   0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was
> + *   The total memory size in bytes if the checks were successful
> + *   0: One of the memory ranges was invalid or VMM_MAX_VM_MEM_SIZE was
>   *   exceeded
>   */
>  size_t
> @@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
>       const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
>
>       if (vcp->vcp_nmemranges == 0 ||
> -         vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
> +         vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
> +             DPRINTF("invalid number of guest memory ranges\n");
>               return (0);
> +     }
>
>       for (i = 0; i < vcp->vcp_nmemranges; i++) {
>               vmr = &vcp->vcp_memranges[i];
>
>               /* Only page-aligned addresses and sizes are permitted */
>               if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
> -                 (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0)
> +                 (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
> +                     DPRINTF("memory range %zu is not page aligned\n", i);
>                       return (0);
> +             }
>
>               /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */
>               if (vmr->vmr_gpa >= maxgpa ||
> -                 vmr->vmr_size > maxgpa - vmr->vmr_gpa)
> +                 vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
> +                     DPRINTF("exceeded max memory size\n");
>                       return (0);
> +             }
>
>               /*
>                * Make sure that all virtual addresses are within the address
> @@ -1667,39 +1673,29 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
>                */
>               if (vmr->vmr_va < VM_MIN_ADDRESS ||
>                   vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
> -                 vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va)
> +                 vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
> +                     DPRINTF("guest va not within range or wraps\n");
>                       return (0);
> +             }
>
>               /*
> -              * Specifying ranges within the PCI MMIO space is forbidden.
> -              * Disallow ranges that start inside the MMIO space:
> -              * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> -              */
> -             if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
> -                 vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END)
> -                     return (0);
> -
> -             /*
> -              * ... and disallow ranges that end inside the MMIO space:
> -              * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> -              */
> -             if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
> -                 vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END)
> -                     return (0);
> -
> -             /*
>                * Make sure that guest physical memory ranges do not overlap
>                * and that they are ascending.
>                */
> -             if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa)
> +             if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
> +                     DPRINTF("guest range %zu overlaps or !ascending\n", i);
>                       return (0);
> +             }
>
> -             memsize += vmr->vmr_size;
> +             /*
> +              * No memory is mappable in MMIO ranges, so don't count towards
> +              * the total guest memory size.
> +              */
> +             if (vmr->vmr_type != VM_MEM_MMIO)
> +                     memsize += vmr->vmr_size;
>               pvmr = vmr;
>       }
>
> -     if (memsize % (1024 * 1024) != 0)
> -             return (0);
>       return (memsize);
>  }
>
> @@ -5633,11 +5629,6 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
>       int i;
>       struct vm_mem_range *vmr;
>
> -     if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
> -             DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
> -             return (VMM_MEM_TYPE_MMIO);
> -     }
> -
>       /* XXX Use binary search? */
>       for (i = 0; i < vm->vm_nmemranges; i++) {
>               vmr = &vm->vm_memranges[i];
> @@ -5649,8 +5640,11 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
>               if (gpa < vmr->vmr_gpa)
>                       break;
>
> -             if (gpa < vmr->vmr_gpa + vmr->vmr_size)
> +             if (gpa < vmr->vmr_gpa + vmr->vmr_size) {
> +                     if (vmr->vmr_type == VM_MEM_MMIO)
> +                             return (VMM_MEM_TYPE_MMIO);
>                       return (VMM_MEM_TYPE_REGULAR);
> +             }
>       }
>
>       DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
> blob - 94feca154717c1e3016990ad260036cd79e29b65
> blob + 45582d39a9cf7624216c3335e576979ef64aea32
> --- sys/arch/amd64/include/vmmvar.h
> +++ sys/arch/amd64/include/vmmvar.h
> @@ -451,6 +451,10 @@ struct vm_mem_range {
>       paddr_t vmr_gpa;
>       vaddr_t vmr_va;
>       size_t  vmr_size;
> +     int     vmr_type;
> +#define VM_MEM_RAM           0       /* Presented as usable system memory. */
> +#define VM_MEM_RESERVED              1       /* Reserved for BIOS, etc. */
> +#define VM_MEM_MMIO          2       /* Special region for device mmio. */
>  };
>
>  /*
> blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a
> blob + 9449adb74bffe88e56886f856d82f58fe9866cbb
> --- usr.sbin/vmd/fw_cfg.c
> +++ usr.sbin/vmd/fw_cfg.c
> @@ -16,6 +16,7 @@
>   */
>  #include <sys/types.h>
>  #include <sys/uio.h>
> +#include <machine/biosvar.h> /* bios_memmap_t */
>  #include <machine/vmmvar.h>
>
>  #include <stdlib.h>
> @@ -63,6 +64,8 @@ static int  fw_cfg_select_file(uint16_t);
>
>  static uint64_t      fw_cfg_dma_addr;
>
> +static bios_memmap_t e820[VMM_MAX_MEM_RANGES];
> +
>  static int   fw_cfg_select_file(uint16_t);
>  static void  fw_cfg_file_dir(void);
>
> @@ -71,7 +74,23 @@ fw_cfg_init(struct vmop_create_params *vmc)
>  {
>       const char *bootorder = NULL;
>       unsigned int sd = 0;
> +     size_t i, e820_len = 0;
>
> +     /* Define e820 memory ranges. */
> +     memset(&e820, 0, sizeof(e820));
> +     for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) {
> +             struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i];
> +             bios_memmap_t *entry = &e820[i];
> +             entry->addr = range->vmr_gpa;
> +             entry->size = range->vmr_size;
> +             if (range->vmr_type == VM_MEM_RAM)
> +                     entry->type = BIOS_MAP_FREE;
> +             else
> +                     entry->type = BIOS_MAP_RES;
> +             e820_len += sizeof(bios_memmap_t);
> +     }
> +     fw_cfg_add_file("etc/e820", &e820, e820_len);
> +
>       /* do not double print chars on serial port */
>       fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd));
>
> blob - 651719542d28ce44bccb0487867ece7e72686606
> blob + 565725e1385c98f473f35ae257f7b061167fb076
> --- usr.sbin/vmd/loadfile_elf.c
> +++ usr.sbin/vmd/loadfile_elf.c
> @@ -334,38 +334,23 @@ create_bios_memmap(struct vm_create_params *vcp, bios_
>  static size_t
>  create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
>  {
> -     size_t i, n = 0, sz;
> -     paddr_t gpa;
> +     size_t i, n = 0;
>       struct vm_mem_range *vmr;
>
> -     for (i = 0; i < vcp->vcp_nmemranges; i++) {
> +     for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
>               vmr = &vcp->vcp_memranges[i];
> -             gpa = vmr->vmr_gpa;
> -             sz = vmr->vmr_size;
> -
> -             /*
> -              * Make sure that we do not mark the ROM/video RAM area in the
> -              * low memory as physcal memory available to the kernel.
> -              */
> -             if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
> -                     if (gpa >= LOWMEM_KB * 1024)
> -                             sz = 0;
> -                     else
> -                             sz = LOWMEM_KB * 1024 - gpa;
> -             }
> -
> -             if (sz != 0) {
> -                     memmap[n].addr = gpa;
> -                     memmap[n].size = sz;
> -                     memmap[n].type = 0x1;   /* Type 1 : Normal memory */
> -                     n++;
> -             }
> +             memmap[n].addr = vmr->vmr_gpa;
> +             memmap[n].size = vmr->vmr_size;
> +             if (vmr->vmr_type == VM_MEM_RAM)
> +                     memmap[n].type = BIOS_MAP_FREE;
> +             else
> +                     memmap[n].type = BIOS_MAP_RES;
>       }
>
>       /* Null mem map entry to denote the end of the ranges */
>       memmap[n].addr = 0x0;
>       memmap[n].size = 0x0;
> -     memmap[n].type = 0x0;
> +     memmap[n].type = BIOS_MAP_END;
>       n++;
>
>       return (n);
> blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
> blob + 4dfa3f920000d66c7ac53390009c7e280961c4f8
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp)
>       len = LOWMEM_KB * 1024;
>       vcp->vcp_memranges[0].vmr_gpa = 0x0;
>       vcp->vcp_memranges[0].vmr_size = len;
> +     vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM;
>       mem_bytes -= len;
>
>       /*
> @@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp)
>       len = MB(1) - (LOWMEM_KB * 1024);
>       vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
>       vcp->vcp_memranges[1].vmr_size = len;
> +     vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED;
>       mem_bytes -= len;
>
>       /* If we have less than 2MB remaining, still create a 2nd BIOS area. */
>       if (mem_bytes <= MB(2)) {
>               vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END;
>               vcp->vcp_memranges[2].vmr_size = MB(2);
> +             vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED;
>               vcp->vcp_nmemranges = 3;
>               return;
>       }
> @@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp)
>       /* Third memory region: area above 1MB to MMIO region */
>       vcp->vcp_memranges[2].vmr_gpa = MB(1);
>       vcp->vcp_memranges[2].vmr_size = above_1m;
> +     vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM;
>
> -     /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */
> -     vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
> -     vcp->vcp_memranges[3].vmr_size = MB(2);
> +     /* Fourth region: PCI MMIO range */
> +     vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE;
> +     vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END -
> +         VMM_PCI_MMIO_BAR_BASE + 1;
> +     vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO;
>
> -     /* Fifth region: any remainder above 4GB */
> +     /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */
> +     vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
> +     vcp->vcp_memranges[4].vmr_size = MB(2);
> +     vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED;
> +
> +     /* Sixth region: any remainder above 4GB */
>       if (above_4g > 0) {
> -             vcp->vcp_memranges[4].vmr_gpa = GB(4);
> -             vcp->vcp_memranges[4].vmr_size = above_4g;
> +             vcp->vcp_memranges[5].vmr_gpa = GB(4);
> +             vcp->vcp_memranges[5].vmr_size = above_4g;
> +             vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM;
> +             vcp->vcp_nmemranges = 6;
> +     } else
>               vcp->vcp_nmemranges = 5;
> -     } else
> -             vcp->vcp_nmemranges = 4;
>  }
>
>  /*

Reply via email to