On Tue, Oct 23, 2018 at 09:53:48PM +0000, Adam Steen wrote:
> Hi tech
>
> The following diff allows vmd to specify which ports it can handle
> or fix "XXX something better than a hardcoded list here, maybe configure via
> vmd via the device list in vm create params?"
>
> There are currently two implementation of bsearch in the kernel and this patch
> would add a third, I think these should be consolidated, but i didn't know
> where to put the new function.
> the implementations are in:
> - ieee80211_regdomain.c
> - sys/kern/kern_pledge.c
>
> Cheers
> Adam
>
I appreciate the effort here but I think this can be easily solved with just a
4kbit bitmap, with vmd setting '1's in the corresponding bit location for the
ports it wants passed up to itself. Port numbers > 4k could be always passed.
(You could even use a 8k or 16kbit bitmap for further improvement at the expense
of a bit more data being sent over the create IOCTL).
That way, the calculation as to whether or not to forward the request is just
a couple bit shifts and an array lookup, not a more complex bsearch.
There is precedence for this in vmm - the MSR bitmaps operate the same way
as does the in-use VPID bitmap.
Overall this seems like quite a bit of code for a hotpath like this.
-ml
> Index: sys/arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.221
> diff -u -p -u -p -r1.221 vmm.c
> --- sys/arch/amd64/amd64/vmm.c 7 Oct 2018 22:43:06 -0000 1.221
> +++ sys/arch/amd64/amd64/vmm.c 23 Oct 2018 11:37:56 -0000
> @@ -114,6 +114,7 @@ int vmmioctl(dev_t, u_long, caddr_t, int
> int vmmclose(dev_t, int, int, struct proc *);
> int vmm_start(void);
> int vmm_stop(void);
> +int vmm_compare_vei_port(const void *a, const void *b);
> size_t vm_create_check_mem_ranges(struct vm_create_params *);
> int vm_create(struct vm_create_params *, struct proc *);
> int vm_run(struct vm_run_params *);
> @@ -1116,6 +1117,10 @@ vm_create(struct vm_create_params *vcp,
> return (ret);
> }
> rw_enter_write(&vm->vm_vcpu_lock);
> +
> + vcpu->vc_nportranges = vcp->vcp_nportranges;
> + memcpy(vcpu->vc_portranges, vcp->vcp_portranges,
> + vcpu->vc_nportranges * sizeof(vcp->vcp_portranges[0]));
> vcpu->vc_id = vm->vm_vcpu_ct;
> vm->vm_vcpu_ct++;
> SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link);
> @@ -5059,6 +5064,38 @@ vmm_get_guest_cpu_mode(struct vcpu *vcpu
> }
>
> /*
> + * XXX this should be consolidated in the kernel
> + * see
> + * - ieee80211_regdomain.c
> + * - sys/kern/kern_pledge.c
> + */
> +#ifndef bsearch
> +const void *vmm_bsearch(const void *, const void *, size_t, size_t,
> + int (*)(const void *, const void *));
> +
> +const void *
> +vmm_bsearch(const void *key, const void *base0, size_t nmemb, size_t size,
> + int (*compar)(const void *, const void *))
> +{
> + const char *base = base0;
> + int lim, cmp;
> + const void *p;
> +
> + for (lim = nmemb; lim != 0; lim >>= 1) {
> + p = base + (lim >> 1) * size;
> + cmp = (*compar)(key, p);
> + if (cmp == 0)
> + return ((const void *)p);
> + if (cmp > 0) { /* key > p: move right */
> + base = (const char *)p + size;
> + lim--;
> + } /* else move left */
> + }
> + return (NULL);
> +}
> +#endif
> +
> +/*
> * svm_handle_inout
> *
> * Exit handler for IN/OUT instructions.
> @@ -5113,28 +5150,15 @@ svm_handle_inout(struct vcpu *vcpu)
> vcpu->vc_gueststate.vg_rip += insn_length;
>
> /*
> - * The following ports usually belong to devices owned by vmd.
> + * The ports specified by vc_portranges belong to devices owned by vmd.
> * Return EAGAIN to signal help needed from userspace (vmd).
> * Return 0 to indicate we don't care about this port.
> - *
> - * XXX something better than a hardcoded list here, maybe
> - * configure via vmd via the device list in vm create params?
> */
> - switch (vcpu->vc_exit.vei.vei_port) {
> - case IO_ICU1 ... IO_ICU1 + 1:
> - case 0x40 ... 0x43:
> - case PCKBC_AUX:
> - case IO_RTC ... IO_RTC + 1:
> - case IO_ICU2 ... IO_ICU2 + 1:
> - case 0x3f8 ... 0x3ff:
> - case ELCR0 ... ELCR1:
> - case 0x500 ... 0x50f:
> - case 0xcf8:
> - case 0xcfc ... 0xcff:
> - case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END:
> + if (vmm_bsearch(&vcpu->vc_exit.vei.vei_port, vcpu->vc_portranges,
> + vcpu->vc_nportranges, sizeof(struct vm_port_range),
> + vmm_compare_vei_port)) {
> ret = EAGAIN;
> - break;
> - default:
> + } else {
> /* Read from unsupported ports returns FFs */
> if (vcpu->vc_exit.vei.vei_dir == 1) {
> switch(vcpu->vc_exit.vei.vei_size) {
> @@ -5206,28 +5230,15 @@ vmx_handle_inout(struct vcpu *vcpu)
> vcpu->vc_gueststate.vg_rip += insn_length;
>
> /*
> - * The following ports usually belong to devices owned by vmd.
> + * The ports specified by vc_portranges belong to devices owned by vmd.
> * Return EAGAIN to signal help needed from userspace (vmd).
> * Return 0 to indicate we don't care about this port.
> - *
> - * XXX something better than a hardcoded list here, maybe
> - * configure via vmd via the device list in vm create params?
> */
> - switch (vcpu->vc_exit.vei.vei_port) {
> - case IO_ICU1 ... IO_ICU1 + 1:
> - case 0x40 ... 0x43:
> - case PCKBC_AUX:
> - case IO_RTC ... IO_RTC + 1:
> - case IO_ICU2 ... IO_ICU2 + 1:
> - case 0x3f8 ... 0x3ff:
> - case ELCR0 ... ELCR1:
> - case 0xcf8:
> - case 0xcfc ... 0xcff:
> - case 0x500 ... 0x50f:
> - case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END:
> + if (vmm_bsearch(&vcpu->vc_exit.vei.vei_port, vcpu->vc_portranges,
> + vcpu->vc_nportranges, sizeof(struct vm_port_range),
> + vmm_compare_vei_port)) {
> ret = EAGAIN;
> - break;
> - default:
> + } else {
> /* Read from unsupported ports returns FFs */
> if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
> if (vcpu->vc_exit.vei.vei_size == 4)
> @@ -5243,6 +5254,19 @@ vmx_handle_inout(struct vcpu *vcpu)
> return (ret);
> }
>
> +int
> +vmm_compare_vei_port(const void *a, const void *b)
> +{
> + const uint16_t vei_port = *(uint16_t*)a;
> + const struct vm_port_range port_range = *(struct vm_port_range*)b;
> +
> + if(vei_port < port_range.vpr_lower_port)
> + return -1;
> + if(vei_port > port_range.vpr_upper_port)
> + return 1;
> + else
> + return 0;
> +}
> /*
> * vmx_load_pdptes
> *
> Index: sys/arch/amd64/include/vmmvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.59
> diff -u -p -u -p -r1.59 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h 20 Sep 2018 14:32:59 -0000 1.59
> +++ sys/arch/amd64/include/vmmvar.h 23 Oct 2018 11:37:57 -0000
> @@ -32,6 +32,7 @@
> #define VMM_MAX_VCPUS_PER_VM 64
> #define VMM_MAX_VM_MEM_SIZE 32768
> #define VMM_MAX_NICS_PER_VM 4
> +#define VMM_MAX_PORT_RANGES 16
>
> #define VMM_PCI_MMIO_BAR_BASE 0xF0000000ULL
> #define VMM_PCI_MMIO_BAR_END 0xFFFFFFFFULL
> @@ -430,6 +431,11 @@ struct vm_mem_range {
> size_t vmr_size;
> };
>
> +struct vm_port_range {
> + uint16_t vpr_lower_port;
> + uint16_t vpr_upper_port;
> +};
> +
> /*
> * struct vm_exit
> *
> @@ -450,12 +456,14 @@ struct vm_create_params {
> size_t vcp_ncpus;
> size_t vcp_ndisks;
> size_t vcp_nnics;
> + size_t vcp_nportranges;
> struct vm_mem_range vcp_memranges[VMM_MAX_MEM_RANGES];
> char
> vcp_disks[VMM_MAX_DISKS_PER_VM][VMM_MAX_PATH_DISK];
> char vcp_cdrom[VMM_MAX_PATH_CDROM];
> char vcp_name[VMM_MAX_NAME_LEN];
> char vcp_kernel[VMM_MAX_KERNEL_PATH];
> uint8_t vcp_macs[VMM_MAX_NICS_PER_VM][6];
> + struct vm_port_range vcp_portranges[VMM_MAX_PORT_RANGES];
>
> /* Output parameter from VMM_IOC_CREATE */
> uint32_t vcp_id;
> @@ -840,6 +848,9 @@ struct vcpu {
> /* MSR bitmap address */
> vaddr_t vc_msr_bitmap_va;
> uint64_t vc_msr_bitmap_pa;
> +
> + size_t vc_nportranges;
> + struct vm_port_range vc_portranges[VMM_MAX_PORT_RANGES];
>
> struct vm *vc_parent;
> uint32_t vc_id;
> Index: usr.sbin/vmd/vmd.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.104
> diff -u -p -u -p -r1.104 vmd.c
> --- usr.sbin/vmd/vmd.c 15 Oct 2018 10:35:41 -0000 1.104
> +++ usr.sbin/vmd/vmd.c 23 Oct 2018 11:38:02 -0000
> @@ -41,6 +41,8 @@
> #include <pwd.h>
> #include <grp.h>
>
> +#include <dev/isa/isareg.h>
> +
> #include <machine/specialreg.h>
> #include <machine/vmmvar.h>
>
> @@ -72,6 +74,30 @@ static struct privsep_proc procs[] = {
> { "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown },
> };
>
> +/* The following ports belong to devices owned by vmd. */
> +static struct vm_port_range port_ranges[] = {
> + {.vpr_lower_port = IO_ICU1,
> + .vpr_upper_port = IO_ICU1 + 1},
> + {.vpr_lower_port = 0x40,
> + .vpr_upper_port = 0x43},
> + {.vpr_lower_port = PCKBC_AUX,
> + .vpr_upper_port = PCKBC_AUX},
> + {.vpr_lower_port = IO_RTC,
> + .vpr_upper_port = IO_RTC + 1},
> + {.vpr_lower_port = IO_ICU2,
> + .vpr_upper_port = IO_ICU2 + 1},
> + {.vpr_lower_port = 0x3f8,
> + .vpr_upper_port = 0x3ff},
> + {.vpr_lower_port = ELCR0,
> + .vpr_upper_port = ELCR1},
> + {.vpr_lower_port = 0xcf8,
> + .vpr_upper_port = 0xcf8},
> + {.vpr_lower_port = 0xcfc,
> + .vpr_upper_port = 0xcff},
> + {.vpr_lower_port = VMM_PCI_IO_BAR_BASE,
> + .vpr_upper_port = VMM_PCI_IO_BAR_END}
> +};
> +
> /* For the privileged process */
> static struct privsep_proc *proc_priv = &procs[0];
> static struct passwd proc_privpw;
> @@ -1219,6 +1245,7 @@ vm_register(struct privsep *ps, struct v
> vcp->vcp_ncpus = 1;
> if (vcp->vcp_memranges[0].vmr_size == 0)
> vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
> + vcp->vcp_nportranges = sizeof(port_ranges)/sizeof(port_ranges[0]);
> if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
> log_warnx("invalid number of CPUs");
> goto fail;
> @@ -1239,6 +1266,9 @@ vm_register(struct privsep *ps, struct v
> *vcp->vcp_name == '_') {
> log_warnx("invalid VM name");
> goto fail;
> + } else if(vcp->vcp_nportranges > VMM_MAX_PORT_RANGES) {
> + log_warnx("invalid number of port ranges");
> + goto fail;
> } else {
> for (s = vcp->vcp_name; *s != '\0'; ++s) {
> if (!(isalnum(*s) || *s == '.' || *s == '-' ||
> @@ -1248,6 +1278,8 @@ vm_register(struct privsep *ps, struct v
> }
> }
> }
> +
> + memcpy(&vcp->vcp_portranges, &port_ranges, sizeof(port_ranges));
>
> /* track active users */
> if (uid != 0 && env->vmd_users != NULL &&
>