tech@,

The below diff tweaks how vmd and vmm define memory ranges (adding a
"type" attribute) so we can properly build an e820 memory map to hand to
things like SeaBIOS or the OpenBSD ramdisk kernel (when direct booting
bsd.rd).

Why do it? We've been carrying a few patches to SeaBIOS in the ports
tree to hack around how vmd articulates some memory range details. By
finally implementing a proper bios memory map table we can drop some of
those patches. (Diff to ports@ coming shortly.)

Bonus is it cleans up how we were hacking a bios memory map for direct
booting ramdisk kernels.

Note: the below diff *will* work with the current SeaBIOS
(vmm-firmware), so you do *not* need to build the port.

You will, however, need to:
- build, install, & reboot into a new kernel
- make sure you update /usr/include/amd64/vmmvar.h with a copy of
  symlink to sys/arch/amd64/include/vmmvar.h
- rebuild & install vmctl
- rebuild & install vmd

This should *not* result in any behavioral changes of current vmd
guests. If you notice any, especially guests failing to start, please
rebuild a kernel with VMM_DEBUG to help diagnose the regression.

-dv

diff refs/heads/master refs/heads/vmd-e820
commit - a96642fb40af450c6576e205fab247cdbce0b5ed
commit + f3cb01998127d200e95ff9984a7503eb16c2a8d8
blob - 3f7e0ce405ae3c6b0b4a787de341839886f97436
blob + f2a464217838d3f0a50e4131b5b074b315e490fb
--- sys/arch/amd64/amd64/vmm.c
+++ sys/arch/amd64/amd64/vmm.c
@@ -1643,21 +1643,27 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
        const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;

        if (vcp->vcp_nmemranges == 0 ||
-           vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
+           vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
+               DPRINTF("invalid number of guest memory ranges\n");
                return (0);
+       }

        for (i = 0; i < vcp->vcp_nmemranges; i++) {
                vmr = &vcp->vcp_memranges[i];

                /* Only page-aligned addresses and sizes are permitted */
                if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
-                   (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0)
+                   (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
+                       DPRINTF("memory range %zu is not page aligned\n", i);
                        return (0);
+               }

                /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */
                if (vmr->vmr_gpa >= maxgpa ||
-                   vmr->vmr_size > maxgpa - vmr->vmr_gpa)
+                   vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
+                       DPRINTF("exceeded max memory size\n");
                        return (0);
+               }

                /*
                 * Make sure that all virtual addresses are within the address
@@ -1667,39 +1673,55 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
                 */
                if (vmr->vmr_va < VM_MIN_ADDRESS ||
                    vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
-                   vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va)
+                   vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
+                       DPRINTF("guest va not within range or wraps\n");
                        return (0);
+               }

                /*
                 * Specifying ranges within the PCI MMIO space is forbidden.
                 * Disallow ranges that start inside the MMIO space:
                 * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
                 */
-               if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
-                   vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END)
+               if (vmr->vmr_type == VM_MEM_RAM &&
+                   vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE &&
+                   vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) {
+                       DPRINTF("guest RAM range %zu cannot being in mmio range"
+                           " (gpa=0x%lx)\n", i, vmr->vmr_gpa);
                        return (0);
+               }

                /*
                 * ... and disallow ranges that end inside the MMIO space:
                 * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
                 */
-               if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
-                   vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END)
+               if (vmr->vmr_type == VM_MEM_RAM &&
+                   vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE &&
+                   vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) {
+                       DPRINTF("guest RAM range %zu cannot end in mmio range"
+                           " (gpa=0x%lx, sz=0x%lx)\n", i, vmr->vmr_gpa,
+                           vmr->vmr_size);
                        return (0);
+               }

                /*
                 * Make sure that guest physical memory ranges do not overlap
                 * and that they are ascending.
                 */
-               if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa)
+               if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
+                       DPRINTF("guest range %zu overlaps or !ascending\n", i);
                        return (0);
+               }

                memsize += vmr->vmr_size;
                pvmr = vmr;
        }

-       if (memsize % (1024 * 1024) != 0)
+       if (memsize % (1024 * 1024) != 0) {
+               DPRINTF("memory size not a multiple of 1MB\n");
                return (0);
+       }
+
        return (memsize);
 }

blob - 94feca154717c1e3016990ad260036cd79e29b65
blob + 2c57f10b9340e8a779f50bee18d235a299721571
--- sys/arch/amd64/include/vmmvar.h
+++ sys/arch/amd64/include/vmmvar.h
@@ -451,6 +451,9 @@ struct vm_mem_range {
        paddr_t vmr_gpa;
        vaddr_t vmr_va;
        size_t  vmr_size;
+       int     vmr_type;
+#define VM_MEM_RAM                     0
+#define VM_MEM_RESERVED                        1
 };

 /*
blob - 4ec036912cafa154f4eb24ce757f0cb6e4c6bf4a
blob + eb0bea236ed0d6c4d68f6699eb6720ef8fca296c
--- usr.sbin/vmd/fw_cfg.c
+++ usr.sbin/vmd/fw_cfg.c
@@ -16,6 +16,7 @@
  */
 #include <sys/types.h>
 #include <sys/uio.h>
+#include <machine/biosvar.h>   /* bios_memmap_t */
 #include <machine/vmmvar.h>

 #include <stdlib.h>
@@ -63,6 +64,8 @@ static int    fw_cfg_select_file(uint16_t);

 static uint64_t        fw_cfg_dma_addr;

+static bios_memmap_t e820[VMM_MAX_MEM_RANGES];
+
 static int     fw_cfg_select_file(uint16_t);
 static void    fw_cfg_file_dir(void);

@@ -71,7 +74,27 @@ fw_cfg_init(struct vmop_create_params *vmc)
 {
        const char *bootorder = NULL;
        unsigned int sd = 0;
+       size_t i, e820_len = 0;

+       /* Define e820 memory ranges. */
+       memset(&e820, 0, sizeof(e820));
+       for (i = 0; i < vmc->vmc_params.vcp_nmemranges; i++) {
+               struct vm_mem_range *range = &vmc->vmc_params.vcp_memranges[i];
+               bios_memmap_t *entry = &e820[i];
+
+               entry->addr = range->vmr_gpa;
+               entry->size = range->vmr_size;
+               if (range->vmr_type == VM_MEM_RAM)
+                       entry->type = BIOS_MAP_FREE;
+               else if (range->vmr_type == VM_MEM_RESERVED)
+                       entry->type = BIOS_MAP_RES;
+               else
+                       fatalx("undefined memory type %d", entry->type);
+
+               e820_len += sizeof(bios_memmap_t);
+       }
+       fw_cfg_add_file("etc/e820", &e820, e820_len);
+
        /* do not double print chars on serial port */
        fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd));

blob - 651719542d28ce44bccb0487867ece7e72686606
blob + b7f79eb9e140073f75563a6dcb5fdad3cb2b2d22
--- usr.sbin/vmd/loadfile_elf.c
+++ usr.sbin/vmd/loadfile_elf.c
@@ -334,38 +334,26 @@ create_bios_memmap(struct vm_create_params *vcp, bios_
 static size_t
 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
 {
-       size_t i, n = 0, sz;
-       paddr_t gpa;
+       size_t i, n = 0;
        struct vm_mem_range *vmr;

-       for (i = 0; i < vcp->vcp_nmemranges; i++) {
+       for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
                vmr = &vcp->vcp_memranges[i];
-               gpa = vmr->vmr_gpa;
-               sz = vmr->vmr_size;
-
-               /*
-                * Make sure that we do not mark the ROM/video RAM area in the
-                * low memory as physcal memory available to the kernel.
-                */
-               if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
-                       if (gpa >= LOWMEM_KB * 1024)
-                               sz = 0;
-                       else
-                               sz = LOWMEM_KB * 1024 - gpa;
-               }
-
-               if (sz != 0) {
-                       memmap[n].addr = gpa;
-                       memmap[n].size = sz;
-                       memmap[n].type = 0x1;   /* Type 1 : Normal memory */
-                       n++;
-               }
+               memmap[n].addr = vmr->vmr_gpa;
+               memmap[n].size = vmr->vmr_size;
+               if (vmr->vmr_type == VM_MEM_RAM)
+                       memmap[n].type = BIOS_MAP_FREE;
+               else if (vmr->vmr_type == VM_MEM_RESERVED)
+                       memmap[n].type = BIOS_MAP_RES;
+               else
+                       fatalx("%s: invalid vm memory range type %d\n",
+                           __func__, vmr->vmr_type);
        }

        /* Null mem map entry to denote the end of the ranges */
        memmap[n].addr = 0x0;
        memmap[n].size = 0x0;
-       memmap[n].type = 0x0;
+       memmap[n].type = BIOS_MAP_END;
        n++;

        return (n);
blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
blob + 7a1b3bb39cfd4651b076bf5c5e74012bdd11754e
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -899,6 +899,7 @@ create_memory_map(struct vm_create_params *vcp)
        len = LOWMEM_KB * 1024;
        vcp->vcp_memranges[0].vmr_gpa = 0x0;
        vcp->vcp_memranges[0].vmr_size = len;
+       vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM;
        mem_bytes -= len;

        /*
@@ -913,12 +914,14 @@ create_memory_map(struct vm_create_params *vcp)
        len = MB(1) - (LOWMEM_KB * 1024);
        vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
        vcp->vcp_memranges[1].vmr_size = len;
+       vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED;
        mem_bytes -= len;

        /* If we have less than 2MB remaining, still create a 2nd BIOS area. */
        if (mem_bytes <= MB(2)) {
                vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END;
                vcp->vcp_memranges[2].vmr_size = MB(2);
+               vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED;
                vcp->vcp_nmemranges = 3;
                return;
        }
@@ -939,18 +942,27 @@ create_memory_map(struct vm_create_params *vcp)
        /* Third memory region: area above 1MB to MMIO region */
        vcp->vcp_memranges[2].vmr_gpa = MB(1);
        vcp->vcp_memranges[2].vmr_size = above_1m;
+       vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM;

-       /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */
-       vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
-       vcp->vcp_memranges[3].vmr_size = MB(2);
+       /* Fourth region: PCI MMIO range */
+       vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_BASE;
+       vcp->vcp_memranges[3].vmr_size = VMM_PCI_MMIO_BAR_END -
+           VMM_PCI_MMIO_BAR_BASE + 1;
+       vcp->vcp_memranges[3].vmr_type = VM_MEM_RESERVED;

-       /* Fifth region: any remainder above 4GB */
+       /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */
+       vcp->vcp_memranges[4].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
+       vcp->vcp_memranges[4].vmr_size = MB(2);
+       vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED;
+
+       /* Sixth region: any remainder above 4GB */
        if (above_4g > 0) {
-               vcp->vcp_memranges[4].vmr_gpa = GB(4);
-               vcp->vcp_memranges[4].vmr_size = above_4g;
+               vcp->vcp_memranges[5].vmr_gpa = GB(4);
+               vcp->vcp_memranges[5].vmr_size = above_4g;
+               vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM;
+               vcp->vcp_nmemranges = 6;
+       } else
                vcp->vcp_nmemranges = 5;
-       } else
-               vcp->vcp_nmemranges = 4;
 }

 /*

Reply via email to