Mischa <open...@mlst.nl> writes:

> Hi Dave,
>
> Great stuff!!
> Everything is patched, build and booted.
>
> What is the best way to test this?

Start guests as usual. I'd say the only thing definitively to manually
check is that they see the same amount of physical memory as before the
patch.

> Start a bunch of VMs with bsd.rd? Does this still need to be a
> decompressed bsd.rd?

Booting compressed bsd.rd's has been working for awhile now, so no need
to decompress them, btw. If you boot a bsd.rd, it's exercising the
changes to loadfile_elf.c, which is important to test.

>
> Mischa
>
> On 2022-12-10 23:51, Dave Voutila wrote:
>> tech@,
>> The below diff tweaks how vmd and vmm define memory ranges (adding a
>> "type" attribute) so we can properly build an e820 memory map to
>> hand to
>> things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting
>> bsd.rd).
>> Why do it? We've been carrying a few patches to SeaBIOS in the ports
>> tree to hack around how vmd articulates some memory range details. By
>> finally implementing a proper bios memory map table we can drop some of
>> those patches. (Diff to ports@ coming shortly.)
>> Bonus is it cleans up how we were hacking a bios memory map for
>> direct
>> booting ramdisk kernels.
>> Note: the below diff *will* work with the current SeaBIOS
>> (vmm-firmware), so you do *not* need to build the port.
>> You will, however, need to:
>> - build, install, & reboot into a new kernel
>> - make sure you update /usr/include/amd64/vmmvar.h with a copy of
>>   symlink to sys/arch/amd64/include/vmmvar.h
>> - rebuild & install vmctl
>> - rebuild & install vmd
>> This should *not* result in any behavioral changes of current vmd
>> guests. If you notice any, especially guests failing to start, please
>> rebuild a kernel with VMM_DEBUG to help diagnose the regression.
>> -dv
>> diff refs/heads/master refs/heads/vmd-e820
>> commit - a96642fb40af450c6576e205fab247cdbce0b5ed
>> commit + f3cb01998127d200e95ff9984a7503eb16c2a8d8
>> blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436
>> blob + f2a464217838d3f0a50e4131b5b074b315e490fb
>> --- sys/arch/amd64/amd64/vmm.c
>> +++ sys/arch/amd64/amd64/vmm.c
>> @@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct
>> vm_create_params *vc
>>      const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
>>      if (vcp->vcp_nmemranges == 0 ||
>> -        vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
>> +        vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
>> +            DPRINTF("invalid number of guest memory ranges\n");
>>              return (0);
>> +    }
>>      for (i = 0; i < vcp->vcp_nmemranges; i++) {
>>              vmr = &vcp->vcp_memranges[i];
>>              /* Only page-aligned addresses and sizes are permitted
>> */
>>              if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
>> -                (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0)
>> +                (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
>> +                    DPRINTF("memory range %zu is not page aligned\n", i);
>>                      return (0);
>> +            }
>>              /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded
>> */
>>              if (vmr->vmr_gpa >= maxgpa ||
>> -                vmr->vmr_size > maxgpa - vmr->vmr_gpa)
>> +                vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
>> +                    DPRINTF("exceeded max memory size\n");
>>                      return (0);
>> +            }
>>              /*
>>               * Make sure that all virtual addresses are within the address
>> @@ -1667,39 +1673,55 @@ vm_create_check_mem_ranges(struct
>> vm_create_params *vc
>>               */
>>              if (vmr->vmr_va < VM_MIN_ADDRESS ||
>>                  vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
>> -                vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va)
>> +                vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
>> +                    DPRINTF("guest va not within range or wraps\n");
>>                      return (0);
>> +            }
>>              /*
>>               * Specifying ranges within the PCI MMIO space is forbidden.
>>               * Disallow ranges that start inside the MMIO space:
>>               * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
>>               */
>> -            if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
>> -                vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END)
>> +            if (vmr->vmr_type == VM_MEM_RAM &&
>> +                vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
>> +                vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) {
>> +                    DPRINTF("guest RAM range %zu cannot being in mmio range"
>> +                        " (gpa=0x%lx)\n", i, vmr->vmr_gpa);
>>                      return (0);
>> +            }
>>              /*
>>               * ... and disallow ranges that end inside the MMIO space:
>>               * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
>>               */
>> -            if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
>> -                vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END)
>> +            if (vmr->vmr_type == VM_MEM_RAM &&
>> +                vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
>> +                vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) {
>> +                    DPRINTF("guest RAM range %zu cannot end in mmio range"
>> +                        " (gpa=0x%lx, sz=0x%lx)\n", i, vmr->vmr_gpa,
>> +                        vmr->vmr_size);
>>                      return (0);
>> +            }
>>              /*
>>               * Make sure that guest physical memory ranges do not overlap
>>               * and that they are ascending.
>>               */
>> -            if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa)
>> +            if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
>> +                    DPRINTF("guest range %zu overlaps or !ascending\n", i);
>>                      return (0);
>> +            }
>>              memsize += vmr->vmr_size;
>>              pvmr = vmr;
>>      }
>> -    if (memsize % (1024 * 1024) != 0)
>> +    if (memsize % (1024 * 1024) != 0) {
>> +            DPRINTF("memory size not a multiple of 1MB\n");
>>              return (0);
>> +    }
>> +
>>      return (memsize);
>>  }
>> blob - 94feca154717c1e3016990ad260036cd79e29b65
>> blob + 2c57f10b9340e8a779f50bee18d235a299721571
>> --- sys/arch/amd64/include/vmmvar.h
>> +++ sys/arch/amd64/include/vmmvar.h
>> @@ -451,6 +451,9 @@ struct vm_mem_range {
>>      paddr_t vmr_gpa;
>>      vaddr_t vmr_va;
>>      size_t  vmr_size;
>> +    int     vmr_type;
>> +#define VM_MEM_RAM                  0
>> +#define VM_MEM_RESERVED                     1
>>  };
>>  /*
>> blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a
>> blob + eb0bea236ed0d6c4d68f6699eb6720ef8fca296c
>> --- usr.sbin/vmd/fw_cfg.c
>> +++ usr.sbin/vmd/fw_cfg.c
>> @@ -16,6 +16,7 @@
>>   */
>>  #include <sys/types.h>
>>  #include <sys/uio.h>
>> +#include <machine/biosvar.h>        /* bios_memmap_t */
>>  #include <machine/vmmvar.h>
>>  #include <stdlib.h>
>> @@ -63,6 +64,8 @@ static int fw_cfg_select_file(uint16_t);
>>  static uint64_t     fw_cfg_dma_addr;
>> +static bios_memmap_t e820[VMM_MAX_MEM_RANGES];
>> +
>>  static int  fw_cfg_select_file(uint16_t);
>>  static void fw_cfg_file_dir(void);
>> @@ -71,7 +74,27 @@ fw_cfg_init(struct vmop_create_params *vmc)
>>  {
>>      const char *bootorder = NULL;
>>      unsigned int sd = 0;
>> +    size_t i, e820_len = 0;
>> +    /* Define e820 memory ranges. */
>> +    memset(&e820, 0, sizeof(e820));
>> +    for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) {
>> +            struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i];
>> +            bios_memmap_t *entry = &e820[i];
>> +
>> +            entry->addr = range->vmr_gpa;
>> +            entry->size = range->vmr_size;
>> +            if (range->vmr_type == VM_MEM_RAM)
>> +                    entry->type = BIOS_MAP_FREE;
>> +            else if (range->vmr_type == VM_MEM_RESERVED)
>> +                    entry->type = BIOS_MAP_RES;
>> +            else
>> +                    fatalx("undefined memory type %d", entry->type);
>> +
>> +            e820_len += sizeof(bios_memmap_t);
>> +    }
>> +    fw_cfg_add_file("etc/e820", &e820, e820_len);
>> +
>>      /* do not double print chars on serial port */
>>      fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd));
>> blob - 651719542d28ce44bccb0487867ece7e72686606
>> blob + b7f79eb9e140073f75563a6dcb5fdad3cb2b2d22
>> --- usr.sbin/vmd/loadfile_elf.c
>> +++ usr.sbin/vmd/loadfile_elf.c
>> @@ -334,38 +334,26 @@ create_bios_memmap(struct vm_create_params
>> *vcp, bios_
>>  static size_t
>>  create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t
>> *memmap)
>>  {
>> -    size_t i, n = 0, sz;
>> -    paddr_t gpa;
>> +    size_t i, n = 0;
>>      struct vm_mem_range *vmr;
>> -    for (i = 0; i < vcp->vcp_nmemranges; i++) {
>> +    for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
>>              vmr = &vcp->vcp_memranges[i];
>> -            gpa = vmr->vmr_gpa;
>> -            sz = vmr->vmr_size;
>> -
>> -            /*
>> -             * Make sure that we do not mark the ROM/video RAM area in the
>> -             * low memory as physcal memory available to the kernel.
>> -             */
>> -            if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
>> -                    if (gpa >= LOWMEM_KB * 1024)
>> -                            sz = 0;
>> -                    else
>> -                            sz = LOWMEM_KB * 1024 - gpa;
>> -            }
>> -
>> -            if (sz != 0) {
>> -                    memmap[n].addr = gpa;
>> -                    memmap[n].size = sz;
>> -                    memmap[n].type = 0x1;   /* Type 1 : Normal memory */
>> -                    n++;
>> -            }
>> +            memmap[n].addr = vmr->vmr_gpa;
>> +            memmap[n].size = vmr->vmr_size;
>> +            if (vmr->vmr_type == VM_MEM_RAM)
>> +                    memmap[n].type = BIOS_MAP_FREE;
>> +            else if (vmr->vmr_type == VM_MEM_RESERVED)
>> +                    memmap[n].type = BIOS_MAP_RES;
>> +            else
>> +                    fatalx("%s: invalid vm memory range type %d\n",
>> +                        __func__, vmr->vmr_type);
>>      }
>>      /* Null mem map entry to denote the end of the ranges */
>>      memmap[n].addr = 0x0;
>>      memmap[n].size = 0x0;
>> -    memmap[n].type = 0x0;
>> +    memmap[n].type = BIOS_MAP_END;
>>      n++;
>>      return (n);
>> blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
>> blob + 7a1b3bb39cfd4651b076bf5c5e74012bdd11754e
>> --- usr.sbin/vmd/vm.c
>> +++ usr.sbin/vmd/vm.c
>> @@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp)
>>      len = LOWMEM_KB * 1024;
>>      vcp->vcp_memranges[0].vmr_gpa = 0x0;
>>      vcp->vcp_memranges[0].vmr_size = len;
>> +    vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM;
>>      mem_bytes -= len;
>>      /*
>> @@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp)
>>      len = MB(1) - (LOWMEM_KB * 1024);
>>      vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
>>      vcp->vcp_memranges[1].vmr_size = len;
>> +    vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED;
>>      mem_bytes -= len;
>>      /* If we have less than 2MB remaining, still create a 2nd BIOS
>> area. */
>>      if (mem_bytes <= MB(2)) {
>>              vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END;
>>              vcp->vcp_memranges[2].vmr_size = MB(2);
>> +            vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED;
>>              vcp->vcp_nmemranges = 3;
>>              return;
>>      }
>> @@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp)
>>      /* Third memory region: area above 1MB to MMIO region */
>>      vcp->vcp_memranges[2].vmr_gpa = MB(1);
>>      vcp->vcp_memranges[2].vmr_size = above_1m;
>> +    vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM;
>> -    /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */
>> -    vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
>> -    vcp->vcp_memranges[3].vmr_size = MB(2);
>> +    /* Fourth region: PCI MMIO range */
>> +    vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE;
>> +    vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END -
>> +        VMM_PCI_MMIO_BAR_BASE + 1;
>> +    vcp->vcp_memranges[3].vmr_type = VM_MEM_RESERVED;
>> -    /* Fifth region: any remainder above 4GB */
>> +    /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */
>> +    vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
>> +    vcp->vcp_memranges[4].vmr_size = MB(2);
>> +    vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED;
>> +
>> +    /* Sixth region: any remainder above 4GB */
>>      if (above_4g > 0) {
>> -            vcp->vcp_memranges[4].vmr_gpa = GB(4);
>> -            vcp->vcp_memranges[4].vmr_size = above_4g;
>> +            vcp->vcp_memranges[5].vmr_gpa = GB(4);
>> +            vcp->vcp_memranges[5].vmr_size = above_4g;
>> +            vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM;
>> +            vcp->vcp_nmemranges = 6;
>> +    } else
>>              vcp->vcp_nmemranges = 5;
>> -    } else
>> -            vcp->vcp_nmemranges = 4;
>>  }
>>  /*

Reply via email to