On Sat, Sep 11, 2021 at 01:44:33PM -0400, Dave Voutila wrote:
> Syzbot recently discovered that since we don't have any bounds in place
> for number of vms or vcpus it's possible to completely exhuast kernel
> memory or at least put the system in a state where malloc(9) or
> km_alloc(9) fail in systems (e.g. DRM, unveil, etc.) resulting in
> panics. Actually, it first discovered some lock ordering issues, but
> once those were fixed this issue surfaced via the reproducer [1].
>
> I chose 512 as a conservative bound based on the idea that vcpu's have a
> few wired pages of memory each for various VMX/SVM things like VMCS/VMCB
> structures.
>
> Given we also wire guest memory on a page fault and only support 1 vcpu
> per guest currently, it's highly unlikely someone is successfully
> running 512 guests. Once we finish fixing the tlb issues forcing us to
> wire or implement SMP, we can revisit this number.
>
> I checked with openbsd.amsterdam and this is well over their current
> densities. (If anyone *IS* somehow running > 512 guests as of this
> moment, please speak up.)
>
> ok?
>
> [1] https://syzkaller.appspot.com/text?tag=ReproC&x=11f507de30
>
ok mlarkin
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.292
> diff -u -p -r1.292 vmm.c
> --- sys/arch/amd64/amd64/vmm.c5 Sep 2021 16:36:34 - 1.292
> +++ sys/arch/amd64/amd64/vmm.c11 Sep 2021 17:36:28 -
> @@ -99,6 +99,9 @@ struct vmm_softc {
>
> int mode;
>
> + size_t vcpu_ct;
> + size_t vcpu_max;
> +
> struct rwlock vm_lock;
> size_t vm_ct; /* number of in-memory VMs */
> size_t vm_idx; /* next unique VM index */
> @@ -368,6 +371,8 @@ vmm_attach(struct device *parent, struct
> sc->nr_svm_cpus = 0;
> sc->nr_rvi_cpus = 0;
> sc->nr_ept_cpus = 0;
> + sc->vcpu_ct = 0;
> + sc->vcpu_max = VMM_MAX_VCPUS;
> sc->vm_ct = 0;
> sc->vm_idx = 0;
>
> @@ -1498,6 +1503,15 @@ vm_create(struct vm_create_params *vcp,
> if (vcp->vcp_ncpus != 1)
> return (EINVAL);
>
> + rw_enter_write(&vmm_softc->vm_lock);
> + if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
> + printf("%s: maximum vcpus (%lu) reached\n", __func__,
> + vmm_softc->vcpu_max);
> + rw_exit_write(&vmm_softc->vm_lock);
> + return (ENOMEM);
> + }
> + vmm_softc->vcpu_ct += vcp->vcp_ncpus;
> +
> vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
> SLIST_INIT(&vm->vm_vcpu_list);
> rw_init(&vm->vm_vcpu_lock, "vcpu_list");
> @@ -1509,8 +1523,6 @@ vm_create(struct vm_create_params *vcp,
> vm->vm_memory_size = memsize;
> strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
>
> - rw_enter_write(&vmm_softc->vm_lock);
> -
> if (vm_impl_init(vm, p)) {
> printf("failed to init arch-specific features for vm %p\n", vm);
> vm_teardown(vm);
> @@ -3784,6 +3796,7 @@ vm_teardown(struct vm *vm)
> SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
> vcpu_deinit(vcpu);
> pool_put(&vcpu_pool, vcpu);
> + vmm_softc->vcpu_ct--;
> }
>
> vm_impl_deinit(vm);
> Index: sys/arch/amd64/include/vmmvar.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h 31 Aug 2021 17:40:59 - 1.73
> +++ sys/arch/amd64/include/vmmvar.h 11 Sep 2021 17:36:28 -
> @@ -29,6 +29,7 @@
> #define VMM_MAX_PATH_CDROM 128
> #define VMM_MAX_NAME_LEN 64
> #define VMM_MAX_KERNEL_PATH 128
> +#define VMM_MAX_VCPUS512
> #define VMM_MAX_VCPUS_PER_VM 64
> #define VMM_MAX_VM_MEM_SIZE 32768
> #define VMM_MAX_NICS_PER_VM 4
>