On Tue, Oct 23, 2018 at 09:53:48PM +0000, Adam Steen wrote:
> Hi tech
> 
> The following diff allows vmd to specify which ports it can handle
> or fix "XXX something better than a hardcoded list here, maybe configure via
> vmd via the device list in vm create params?"
> 
> There are currently two implementation of bsearch in the kernel and this patch
> would add a third, I think these should be consolidated, but i didn't know
> where to put the new function.
> the implementations are in:
>   - ieee80211_regdomain.c
>   - sys/kern/kern_pledge.c
> 
> Cheers
> Adam
> 

I appreciate the effort here but I think this can be easily solved with just a
4kbit bitmap, with vmd setting '1's in the corresponding bit location for the
ports it wants passed up to itself. Port numbers > 4k could be always passed.

(You could even use a 8k or 16kbit bitmap for further improvement at the expense
of a bit more data being sent over the create IOCTL).

That way, the calculation as to whether or not to forward the request is just
a couple bit shifts and an array lookup, not a more complex bsearch.

There is precedence for this in vmm - the MSR bitmaps operate the same way
as does the in-use VPID bitmap.

Overall this seems like quite a bit of code for a hotpath like this.

-ml

> Index: sys/arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.221
> diff -u -p -u -p -r1.221 vmm.c
> --- sys/arch/amd64/amd64/vmm.c        7 Oct 2018 22:43:06 -0000       1.221
> +++ sys/arch/amd64/amd64/vmm.c        23 Oct 2018 11:37:56 -0000
> @@ -114,6 +114,7 @@ int vmmioctl(dev_t, u_long, caddr_t, int
>  int vmmclose(dev_t, int, int, struct proc *);
>  int vmm_start(void);
>  int vmm_stop(void);
> +int vmm_compare_vei_port(const void *a, const void *b);
>  size_t vm_create_check_mem_ranges(struct vm_create_params *);
>  int vm_create(struct vm_create_params *, struct proc *);
>  int vm_run(struct vm_run_params *);
> @@ -1116,6 +1117,10 @@ vm_create(struct vm_create_params *vcp,
>                       return (ret);
>               }
>               rw_enter_write(&vm->vm_vcpu_lock);
> +
> +             vcpu->vc_nportranges = vcp->vcp_nportranges;
> +             memcpy(vcpu->vc_portranges, vcp->vcp_portranges,
> +                 vcpu->vc_nportranges * sizeof(vcp->vcp_portranges[0]));
>               vcpu->vc_id = vm->vm_vcpu_ct;
>               vm->vm_vcpu_ct++;
>               SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link);
> @@ -5059,6 +5064,38 @@ vmm_get_guest_cpu_mode(struct vcpu *vcpu
>  }
> 
>  /*
> + * XXX this should be consolidated in the kernel
> + * see
> + *  - ieee80211_regdomain.c
> + *  - sys/kern/kern_pledge.c
> + */
> +#ifndef bsearch
> +const void *vmm_bsearch(const void *, const void *, size_t, size_t,
> +    int (*)(const void *, const void *));
> +
> +const void *
> +vmm_bsearch(const void *key, const void *base0, size_t nmemb, size_t size,
> +    int (*compar)(const void *, const void *))
> +{
> +     const char *base = base0;
> +     int lim, cmp;
> +     const void *p;
> +
> +     for (lim = nmemb; lim != 0; lim >>= 1) {
> +             p = base + (lim >> 1) * size;
> +             cmp = (*compar)(key, p);
> +             if (cmp == 0)
> +                     return ((const void *)p);
> +             if (cmp > 0) {  /* key > p: move right */
> +                     base = (const char *)p + size;
> +                     lim--;
> +             } /* else move left */
> +     }
> +     return (NULL);
> +}
> +#endif
> +
> +/*
>   * svm_handle_inout
>   *
>   * Exit handler for IN/OUT instructions.
> @@ -5113,28 +5150,15 @@ svm_handle_inout(struct vcpu *vcpu)
>       vcpu->vc_gueststate.vg_rip += insn_length;
> 
>       /*
> -      * The following ports usually belong to devices owned by vmd.
> +      * The ports specified by vc_portranges belong to devices owned by vmd.
>        * Return EAGAIN to signal help needed from userspace (vmd).
>        * Return 0 to indicate we don't care about this port.
> -      *
> -      * XXX something better than a hardcoded list here, maybe
> -      * configure via vmd via the device list in vm create params?
>        */
> -     switch (vcpu->vc_exit.vei.vei_port) {
> -     case IO_ICU1 ... IO_ICU1 + 1:
> -     case 0x40 ... 0x43:
> -     case PCKBC_AUX:
> -     case IO_RTC ... IO_RTC + 1:
> -     case IO_ICU2 ... IO_ICU2 + 1:
> -     case 0x3f8 ... 0x3ff:
> -     case ELCR0 ... ELCR1:
> -     case 0x500 ... 0x50f:
> -     case 0xcf8:
> -     case 0xcfc ... 0xcff:
> -     case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END:
> +     if (vmm_bsearch(&vcpu->vc_exit.vei.vei_port, vcpu->vc_portranges,
> +         vcpu->vc_nportranges, sizeof(struct vm_port_range),
> +         vmm_compare_vei_port)) {
>               ret = EAGAIN;
> -             break;
> -     default:
> +     } else {
>               /* Read from unsupported ports returns FFs */
>               if (vcpu->vc_exit.vei.vei_dir == 1) {
>                       switch(vcpu->vc_exit.vei.vei_size) {
> @@ -5206,28 +5230,15 @@ vmx_handle_inout(struct vcpu *vcpu)
>       vcpu->vc_gueststate.vg_rip += insn_length;
> 
>       /*
> -      * The following ports usually belong to devices owned by vmd.
> +      * The ports specified by vc_portranges belong to devices owned by vmd.
>        * Return EAGAIN to signal help needed from userspace (vmd).
>        * Return 0 to indicate we don't care about this port.
> -      *
> -      * XXX something better than a hardcoded list here, maybe
> -      * configure via vmd via the device list in vm create params?
>        */
> -     switch (vcpu->vc_exit.vei.vei_port) {
> -     case IO_ICU1 ... IO_ICU1 + 1:
> -     case 0x40 ... 0x43:
> -     case PCKBC_AUX:
> -     case IO_RTC ... IO_RTC + 1:
> -     case IO_ICU2 ... IO_ICU2 + 1:
> -     case 0x3f8 ... 0x3ff:
> -     case ELCR0 ... ELCR1:
> -     case 0xcf8:
> -     case 0xcfc ... 0xcff:
> -     case 0x500 ... 0x50f:
> -     case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END:
> +     if (vmm_bsearch(&vcpu->vc_exit.vei.vei_port, vcpu->vc_portranges,
> +         vcpu->vc_nportranges, sizeof(struct vm_port_range),
> +         vmm_compare_vei_port)) {
>               ret = EAGAIN;
> -             break;
> -     default:
> +     } else {
>               /* Read from unsupported ports returns FFs */
>               if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
>                       if (vcpu->vc_exit.vei.vei_size == 4)
> @@ -5243,6 +5254,19 @@ vmx_handle_inout(struct vcpu *vcpu)
>       return (ret);
>  }
> 
> +int
> +vmm_compare_vei_port(const void *a, const void *b)
> +{
> +     const uint16_t vei_port = *(uint16_t*)a;
> +     const struct vm_port_range port_range = *(struct vm_port_range*)b;
> +
> +     if(vei_port < port_range.vpr_lower_port)
> +             return -1;
> +     if(vei_port > port_range.vpr_upper_port)
> +             return 1;
> +     else
> +             return 0;
> +}
>  /*
>   * vmx_load_pdptes
>   *
> Index: sys/arch/amd64/include/vmmvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.59
> diff -u -p -u -p -r1.59 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h   20 Sep 2018 14:32:59 -0000      1.59
> +++ sys/arch/amd64/include/vmmvar.h   23 Oct 2018 11:37:57 -0000
> @@ -32,6 +32,7 @@
>  #define VMM_MAX_VCPUS_PER_VM 64
>  #define VMM_MAX_VM_MEM_SIZE  32768
>  #define VMM_MAX_NICS_PER_VM  4
> +#define VMM_MAX_PORT_RANGES  16
> 
>  #define VMM_PCI_MMIO_BAR_BASE        0xF0000000ULL
>  #define VMM_PCI_MMIO_BAR_END 0xFFFFFFFFULL
> @@ -430,6 +431,11 @@ struct vm_mem_range {
>       size_t  vmr_size;
>  };
> 
> +struct vm_port_range {
> +     uint16_t vpr_lower_port;
> +     uint16_t vpr_upper_port;
> +};
> +
>  /*
>   * struct vm_exit
>   *
> @@ -450,12 +456,14 @@ struct vm_create_params {
>       size_t                  vcp_ncpus;
>       size_t                  vcp_ndisks;
>       size_t                  vcp_nnics;
> +     size_t                  vcp_nportranges;
>       struct vm_mem_range     vcp_memranges[VMM_MAX_MEM_RANGES];
>       char                    
> vcp_disks[VMM_MAX_DISKS_PER_VM][VMM_MAX_PATH_DISK];
>       char                    vcp_cdrom[VMM_MAX_PATH_CDROM];
>       char                    vcp_name[VMM_MAX_NAME_LEN];
>       char                    vcp_kernel[VMM_MAX_KERNEL_PATH];
>       uint8_t                 vcp_macs[VMM_MAX_NICS_PER_VM][6];
> +     struct vm_port_range    vcp_portranges[VMM_MAX_PORT_RANGES];
> 
>       /* Output parameter from VMM_IOC_CREATE */
>       uint32_t        vcp_id;
> @@ -840,6 +848,9 @@ struct vcpu {
>       /* MSR bitmap address */
>       vaddr_t vc_msr_bitmap_va;
>       uint64_t vc_msr_bitmap_pa;
> +
> +     size_t                  vc_nportranges;
> +     struct vm_port_range    vc_portranges[VMM_MAX_PORT_RANGES];
> 
>       struct vm *vc_parent;
>       uint32_t vc_id;
> Index: usr.sbin/vmd/vmd.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.104
> diff -u -p -u -p -r1.104 vmd.c
> --- usr.sbin/vmd/vmd.c        15 Oct 2018 10:35:41 -0000      1.104
> +++ usr.sbin/vmd/vmd.c        23 Oct 2018 11:38:02 -0000
> @@ -41,6 +41,8 @@
>  #include <pwd.h>
>  #include <grp.h>
> 
> +#include <dev/isa/isareg.h>
> +
>  #include <machine/specialreg.h>
>  #include <machine/vmmvar.h>
> 
> @@ -72,6 +74,30 @@ static struct privsep_proc procs[] = {
>       { "vmm",        PROC_VMM,       vmd_dispatch_vmm, vmm, vmm_shutdown },
>  };
> 
> +/* The following ports belong to devices owned by vmd. */
> +static struct vm_port_range port_ranges[] = {
> +     {.vpr_lower_port = IO_ICU1,
> +      .vpr_upper_port = IO_ICU1 + 1},
> +     {.vpr_lower_port = 0x40,
> +      .vpr_upper_port = 0x43},
> +     {.vpr_lower_port = PCKBC_AUX,
> +      .vpr_upper_port = PCKBC_AUX},
> +     {.vpr_lower_port = IO_RTC,
> +      .vpr_upper_port = IO_RTC + 1},
> +     {.vpr_lower_port = IO_ICU2,
> +      .vpr_upper_port = IO_ICU2 + 1},
> +     {.vpr_lower_port = 0x3f8,
> +      .vpr_upper_port = 0x3ff},
> +     {.vpr_lower_port = ELCR0,
> +      .vpr_upper_port = ELCR1},
> +     {.vpr_lower_port = 0xcf8,
> +      .vpr_upper_port = 0xcf8},
> +     {.vpr_lower_port = 0xcfc,
> +      .vpr_upper_port = 0xcff},
> +     {.vpr_lower_port = VMM_PCI_IO_BAR_BASE,
> +      .vpr_upper_port = VMM_PCI_IO_BAR_END}
> +};
> +
>  /* For the privileged process */
>  static struct privsep_proc *proc_priv = &procs[0];
>  static struct passwd proc_privpw;
> @@ -1219,6 +1245,7 @@ vm_register(struct privsep *ps, struct v
>               vcp->vcp_ncpus = 1;
>       if (vcp->vcp_memranges[0].vmr_size == 0)
>               vcp->vcp_memranges[0].vmr_size = VM_DEFAULT_MEMORY;
> +     vcp->vcp_nportranges = sizeof(port_ranges)/sizeof(port_ranges[0]);
>       if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) {
>               log_warnx("invalid number of CPUs");
>               goto fail;
> @@ -1239,6 +1266,9 @@ vm_register(struct privsep *ps, struct v
>           *vcp->vcp_name == '_') {
>               log_warnx("invalid VM name");
>               goto fail;
> +     } else if(vcp->vcp_nportranges > VMM_MAX_PORT_RANGES) {
> +             log_warnx("invalid number of port ranges");
> +             goto fail;
>       } else {
>               for (s = vcp->vcp_name; *s != '\0'; ++s) {
>                       if (!(isalnum(*s) || *s == '.' || *s == '-' ||
> @@ -1248,6 +1278,8 @@ vm_register(struct privsep *ps, struct v
>                       }
>               }
>       }
> +
> +     memcpy(&vcp->vcp_portranges, &port_ranges, sizeof(port_ranges));
> 
>       /* track active users */
>       if (uid != 0 && env->vmd_users != NULL &&
> 

Reply via email to