Re: only open /dev/vmm once in vmd(8)
Dave Voutila writes: > During h2k22 there was some discussion around how vmd(8) manages vms and > the vmm(4) device's role. While looking into something related, I found > vmd opens /dev/vmm in each subprocess during the initial fork+execve > dance. > > The only vmd process that needs /dev/vmm is the vmm process. > > The diff below changes it so that *only* the parent process opens > /dev/vmm and then uses fd passing to send it to the vmm process once > fork+exec'd. This adds a new imsg type specific to this handoff. > > The other processes don't end up with the vmm pledge, so there's no > reason they need open file descriptors to the vmm device. > > OK? ping... Haven't seen any negative reports and I've been working on other things atop this diff for a few weeks now. Any OKs or objections? > > > diff refs/heads/master refs/heads/vmd-vmm-open > commit - adc9c11636d08981539860c611938338c714d31e > commit + 1bb17c1c2d6cd28c231fa2eb6f8494e5498bb1a3 > blob - bd0d8580ffc25d905e5f20c1de5044397ddf313d > blob + 41e4b7b5852c7523a7b1c0e3a73591638a9c9e56 > --- usr.sbin/vmd/vmd.c > +++ usr.sbin/vmd/vmd.c > @@ -847,8 +847,8 @@ main(int argc, char **argv) > proc_priv->p_pw = _privpw; /* initialized to all 0 */ > proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ > > - /* Open /dev/vmm */ > - if (env->vmd_noaction == 0) { > + /* Open /dev/vmm early. */ > + if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { > env->vmd_fd = open(VMM_NODE, O_RDWR); > if (env->vmd_fd == -1) > fatal("%s", VMM_NODE); > @@ -971,6 +971,10 @@ vmd_configure(void) > exit(0); > } > > + /* Send VMM device fd to vmm proc. */ > + proc_compose_imsg(>vmd_ps, PROC_VMM, -1, > + IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); > + > /* Send shared global configuration to all children */ > if (config_setconfig(env) == -1) > return (-1); > blob - c27d03df7336a2c8ede22ec0d83b74d327fb3244 > blob + 2ca4aa336bf68b0d8bf8183e92c9d0ef99d5411e > --- usr.sbin/vmd/vmd.h > +++ usr.sbin/vmd/vmd.h > @@ -108,6 +108,7 @@ enum imsg_type { > IMSG_VMDOP_GET_INFO_VM_DATA, > IMSG_VMDOP_GET_INFO_VM_END_DATA, > IMSG_VMDOP_LOAD, > + IMSG_VMDOP_RECEIVE_VMM_FD, > IMSG_VMDOP_RELOAD, > IMSG_VMDOP_PRIV_IFDESCR, > IMSG_VMDOP_PRIV_IFADD, > blob - 6c2bdbd12a3ee11aa88055200e11e0f2a0ff667f > blob + 2f82a241c1c1672770529ef98f9b748714512704 > --- usr.sbin/vmd/vmm.c > +++ usr.sbin/vmd/vmm.c > @@ -94,9 +94,6 @@ vmm_run(struct privsep *ps, struct privsep_proc *p, vo >*/ > if (pledge("stdio vmm sendfd recvfd proc", NULL) == -1) > fatal("pledge"); > - > - /* Get and terminate all running VMs */ > - get_info_vm(ps, NULL, 1); > } > > int > @@ -315,6 +312,14 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, st > imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, > imsg->fd, , sizeof(var)); > break; > + case IMSG_VMDOP_RECEIVE_VMM_FD: > + if (env->vmd_fd > -1) > + fatalx("already received vmm fd"); > + env->vmd_fd = imsg->fd; > + > + /* Get and terminate all running VMs */ > + get_info_vm(ps, NULL, 1); > + break; > default: > return (-1); > }
Re: only open /dev/vmm once in vmd(8)
Hi Dave, Applied the patch on top of the previous two you provided and all looks good. Running four proper VMs (installed 7.2, with different amount of memory allocated, one of them with rpki-client) and booted ~40 with just bsd.rd. Some log messages I am seeing, which I didn't see/notice before. Let me know if there is something specific I need to look out for. Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ff998 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509fc7501e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ff448 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509fb5001e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ff008 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509f9b401e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ff228 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509f7b001e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ff778 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509f51a01e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ffaa8 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509f1c601e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd1a31ffdd8 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509ee0901e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd09e98fcd0 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509e98e01e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd09e98f780 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509e46c01e Dec 26 16:54:13 current /bsd: vm_impl_init_vmx: created vm_map @ 0xfdd09e98f340 Dec 26 16:54:13 current /bsd: Guest EPTP = 0x509de2601e Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 25 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 26 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x8001003 Dec 26 16:54:13 current /bsd: 1 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID Dec 26 16:54:13 current /bsd: /ASID 27 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 28 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 29 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 30 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 31 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 32 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 33 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmm_alloc_vpid: allocated VPID/ASID 34 Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:13 current /bsd: vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported Dec 26 16:54:13 current /bsd: vmx_handle_cr: mov to cr0 @ 100148a, data=0x80010031 Dec 26 16:54:14 current /bsd: vmm_handle_cpuid: unsupported rax=0x4100 Dec 26 16:54:14 current last message repeated 5 times Dec 26 16:54:14 current /bsd: vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported Dec 26 16:54:14 current last message repeated 2 times Mischa On 2022-12-25 16:57, Dave Voutila wrote: During h2k22 there was some discussion around how vmd(8) manages vms and the vmm(4) device's role. While looking into something related, I found vmd opens /dev/vmm in each subprocess during the initial fork+execve dance. The only vmd process that needs /dev/vmm is the vmm process. The diff below changes it so that *only* the
only open /dev/vmm once in vmd(8)
During h2k22 there was some discussion around how vmd(8) manages vms and the vmm(4) device's role. While looking into something related, I found vmd opens /dev/vmm in each subprocess during the initial fork+execve dance. The only vmd process that needs /dev/vmm is the vmm process. The diff below changes it so that *only* the parent process opens /dev/vmm and then uses fd passing to send it to the vmm process once fork+exec'd. This adds a new imsg type specific to this handoff. The other processes don't end up with the vmm pledge, so there's no reason they need open file descriptors to the vmm device. OK? diff refs/heads/master refs/heads/vmd-vmm-open commit - adc9c11636d08981539860c611938338c714d31e commit + 1bb17c1c2d6cd28c231fa2eb6f8494e5498bb1a3 blob - bd0d8580ffc25d905e5f20c1de5044397ddf313d blob + 41e4b7b5852c7523a7b1c0e3a73591638a9c9e56 --- usr.sbin/vmd/vmd.c +++ usr.sbin/vmd/vmd.c @@ -847,8 +847,8 @@ main(int argc, char **argv) proc_priv->p_pw = _privpw; /* initialized to all 0 */ proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */ - /* Open /dev/vmm */ - if (env->vmd_noaction == 0) { + /* Open /dev/vmm early. */ + if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { env->vmd_fd = open(VMM_NODE, O_RDWR); if (env->vmd_fd == -1) fatal("%s", VMM_NODE); @@ -971,6 +971,10 @@ vmd_configure(void) exit(0); } + /* Send VMM device fd to vmm proc. */ + proc_compose_imsg(>vmd_ps, PROC_VMM, -1, + IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); + /* Send shared global configuration to all children */ if (config_setconfig(env) == -1) return (-1); blob - c27d03df7336a2c8ede22ec0d83b74d327fb3244 blob + 2ca4aa336bf68b0d8bf8183e92c9d0ef99d5411e --- usr.sbin/vmd/vmd.h +++ usr.sbin/vmd/vmd.h @@ -108,6 +108,7 @@ enum imsg_type { IMSG_VMDOP_GET_INFO_VM_DATA, IMSG_VMDOP_GET_INFO_VM_END_DATA, IMSG_VMDOP_LOAD, + IMSG_VMDOP_RECEIVE_VMM_FD, IMSG_VMDOP_RELOAD, IMSG_VMDOP_PRIV_IFDESCR, IMSG_VMDOP_PRIV_IFADD, blob - 6c2bdbd12a3ee11aa88055200e11e0f2a0ff667f blob + 2f82a241c1c1672770529ef98f9b748714512704 --- usr.sbin/vmd/vmm.c +++ usr.sbin/vmd/vmm.c @@ -94,9 +94,6 @@ vmm_run(struct privsep *ps, struct privsep_proc *p, vo */ if (pledge("stdio vmm sendfd recvfd proc", NULL) == -1) fatal("pledge"); - - /* Get and terminate all running VMs */ - get_info_vm(ps, NULL, 1); } int @@ -315,6 +312,14 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, st imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, imsg->fd, , sizeof(var)); break; + case IMSG_VMDOP_RECEIVE_VMM_FD: + if (env->vmd_fd > -1) + fatalx("already received vmm fd"); + env->vmd_fd = imsg->fd; + + /* Get and terminate all running VMs */ + get_info_vm(ps, NULL, 1); + break; default: return (-1); }