Re: vmm(4): set global vcpu limit to 512

2021-09-11 Thread Mike Larkin
On Sat, Sep 11, 2021 at 01:44:33PM -0400, Dave Voutila wrote:
> Syzbot recently discovered that since we don't have any bounds in place
> for number of vms or vcpus it's possible to completely exhuast kernel
> memory or at least put the system in a state where malloc(9) or
> km_alloc(9) fail in systems (e.g. DRM, unveil, etc.) resulting in
> panics. Actually, it first discovered some lock ordering issues, but
> once those were fixed this issue surfaced via the reproducer [1].
>
> I chose 512 as a conservative bound based on the idea that vcpu's have a
> few wired pages of memory each for various VMX/SVM things like VMCS/VMCB
> structures.
>
> Given we also wire guest memory on a page fault and only support 1 vcpu
> per guest currently, it's highly unlikely someone is successfully
> running 512 guests. Once we finish fixing the tlb issues forcing us to
> wire or implement SMP, we can revisit this number.
>
> I checked with openbsd.amsterdam and this is well over their current
> densities. (If anyone *IS* somehow running > 512 guests as of this
> moment, please speak up.)
>
> ok?
>
> [1] https://syzkaller.appspot.com/text?tag=ReproC&x=11f507de30
>

ok mlarkin

> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.292
> diff -u -p -r1.292 vmm.c
> --- sys/arch/amd64/amd64/vmm.c5 Sep 2021 16:36:34 -   1.292
> +++ sys/arch/amd64/amd64/vmm.c11 Sep 2021 17:36:28 -
> @@ -99,6 +99,9 @@ struct vmm_softc {
>
>   int mode;
>
> + size_t  vcpu_ct;
> + size_t  vcpu_max;
> +
>   struct rwlock   vm_lock;
>   size_t  vm_ct;  /* number of in-memory VMs */
>   size_t  vm_idx; /* next unique VM index */
> @@ -368,6 +371,8 @@ vmm_attach(struct device *parent, struct
>   sc->nr_svm_cpus = 0;
>   sc->nr_rvi_cpus = 0;
>   sc->nr_ept_cpus = 0;
> + sc->vcpu_ct = 0;
> + sc->vcpu_max = VMM_MAX_VCPUS;
>   sc->vm_ct = 0;
>   sc->vm_idx = 0;
>
> @@ -1498,6 +1503,15 @@ vm_create(struct vm_create_params *vcp,
>   if (vcp->vcp_ncpus != 1)
>   return (EINVAL);
>
> + rw_enter_write(&vmm_softc->vm_lock);
> + if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
> + printf("%s: maximum vcpus (%lu) reached\n", __func__,
> + vmm_softc->vcpu_max);
> + rw_exit_write(&vmm_softc->vm_lock);
> + return (ENOMEM);
> + }
> + vmm_softc->vcpu_ct += vcp->vcp_ncpus;
> +
>   vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
>   SLIST_INIT(&vm->vm_vcpu_list);
>   rw_init(&vm->vm_vcpu_lock, "vcpu_list");
> @@ -1509,8 +1523,6 @@ vm_create(struct vm_create_params *vcp,
>   vm->vm_memory_size = memsize;
>   strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
>
> - rw_enter_write(&vmm_softc->vm_lock);
> -
>   if (vm_impl_init(vm, p)) {
>   printf("failed to init arch-specific features for vm %p\n", vm);
>   vm_teardown(vm);
> @@ -3784,6 +3796,7 @@ vm_teardown(struct vm *vm)
>   SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
>   vcpu_deinit(vcpu);
>   pool_put(&vcpu_pool, vcpu);
> + vmm_softc->vcpu_ct--;
>   }
>
>   vm_impl_deinit(vm);
> Index: sys/arch/amd64/include/vmmvar.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h   31 Aug 2021 17:40:59 -  1.73
> +++ sys/arch/amd64/include/vmmvar.h   11 Sep 2021 17:36:28 -
> @@ -29,6 +29,7 @@
>  #define VMM_MAX_PATH_CDROM   128
>  #define VMM_MAX_NAME_LEN 64
>  #define VMM_MAX_KERNEL_PATH  128
> +#define VMM_MAX_VCPUS512
>  #define VMM_MAX_VCPUS_PER_VM 64
>  #define VMM_MAX_VM_MEM_SIZE  32768
>  #define VMM_MAX_NICS_PER_VM  4
>



vmm(4): set global vcpu limit to 512

2021-09-11 Thread Dave Voutila
Syzbot recently discovered that since we don't have any bounds in place
for number of vms or vcpus it's possible to completely exhuast kernel
memory or at least put the system in a state where malloc(9) or
km_alloc(9) fail in systems (e.g. DRM, unveil, etc.) resulting in
panics. Actually, it first discovered some lock ordering issues, but
once those were fixed this issue surfaced via the reproducer [1].

I chose 512 as a conservative bound based on the idea that vcpu's have a
few wired pages of memory each for various VMX/SVM things like VMCS/VMCB
structures.

Given we also wire guest memory on a page fault and only support 1 vcpu
per guest currently, it's highly unlikely someone is successfully
running 512 guests. Once we finish fixing the tlb issues forcing us to
wire or implement SMP, we can revisit this number.

I checked with openbsd.amsterdam and this is well over their current
densities. (If anyone *IS* somehow running > 512 guests as of this
moment, please speak up.)

ok?

[1] https://syzkaller.appspot.com/text?tag=ReproC&x=11f507de30

Index: sys/arch/amd64/amd64/vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.292
diff -u -p -r1.292 vmm.c
--- sys/arch/amd64/amd64/vmm.c  5 Sep 2021 16:36:34 -   1.292
+++ sys/arch/amd64/amd64/vmm.c  11 Sep 2021 17:36:28 -
@@ -99,6 +99,9 @@ struct vmm_softc {

int mode;

+   size_t  vcpu_ct;
+   size_t  vcpu_max;
+
struct rwlock   vm_lock;
size_t  vm_ct;  /* number of in-memory VMs */
size_t  vm_idx; /* next unique VM index */
@@ -368,6 +371,8 @@ vmm_attach(struct device *parent, struct
sc->nr_svm_cpus = 0;
sc->nr_rvi_cpus = 0;
sc->nr_ept_cpus = 0;
+   sc->vcpu_ct = 0;
+   sc->vcpu_max = VMM_MAX_VCPUS;
sc->vm_ct = 0;
sc->vm_idx = 0;

@@ -1498,6 +1503,15 @@ vm_create(struct vm_create_params *vcp,
if (vcp->vcp_ncpus != 1)
return (EINVAL);

+   rw_enter_write(&vmm_softc->vm_lock);
+   if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
+   printf("%s: maximum vcpus (%lu) reached\n", __func__,
+   vmm_softc->vcpu_max);
+   rw_exit_write(&vmm_softc->vm_lock);
+   return (ENOMEM);
+   }
+   vmm_softc->vcpu_ct += vcp->vcp_ncpus;
+
vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
SLIST_INIT(&vm->vm_vcpu_list);
rw_init(&vm->vm_vcpu_lock, "vcpu_list");
@@ -1509,8 +1523,6 @@ vm_create(struct vm_create_params *vcp,
vm->vm_memory_size = memsize;
strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);

-   rw_enter_write(&vmm_softc->vm_lock);
-
if (vm_impl_init(vm, p)) {
printf("failed to init arch-specific features for vm %p\n", vm);
vm_teardown(vm);
@@ -3784,6 +3796,7 @@ vm_teardown(struct vm *vm)
SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
vcpu_deinit(vcpu);
pool_put(&vcpu_pool, vcpu);
+   vmm_softc->vcpu_ct--;
}

vm_impl_deinit(vm);
Index: sys/arch/amd64/include/vmmvar.h
===
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.73
diff -u -p -r1.73 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h 31 Aug 2021 17:40:59 -  1.73
+++ sys/arch/amd64/include/vmmvar.h 11 Sep 2021 17:36:28 -
@@ -29,6 +29,7 @@
 #define VMM_MAX_PATH_CDROM 128
 #define VMM_MAX_NAME_LEN   64
 #define VMM_MAX_KERNEL_PATH128
+#define VMM_MAX_VCPUS  512
 #define VMM_MAX_VCPUS_PER_VM   64
 #define VMM_MAX_VM_MEM_SIZE32768
 #define VMM_MAX_NICS_PER_VM4