Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

2021-04-05 Thread Bryan Steele
On Mon, Apr 05, 2021 at 09:54:14AM -0400, Dave Voutila wrote:
> 
> Dave Voutila writes:
> 
> > The following diff cleans up and improves MSR-related event handling in
> > vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> > mentioned in a previous email to tech@ about fixing support for 9front
> > guests [1], we found some discprepencies between vmm(4)'s handling on
> > Intel hosts and AMD hosts.
> >
> > While the diff has been tested already by abieber@ and brynet@ with
> > additional review by mlarkin@, I'm looking for additional testers
> > willing to look for regressions.
> 
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.

This is ok brynet@ too!

> >
> > This diff specifically improves and standardizes msr-based exit handling
> > between Intel and AMD hosts to the following:
> >
> > 1. All RDMSR instructions that cause a vm-exit must be explicitly
> >handled (e.g. via emulation) or they result in injecting a #GP
> >exception into the guest.
> >
> > 2. All WRMSR instructions that cause a vm-exit and are not explicitly
> >handled are ignored (i.e. %rax and %rdx values are not inspected or
> >used).
> >
> > Consequently with the change for (1) above, the diff adds explicit
> > handling for the following MSRs:
> >
> > 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
> >host state is not touched. The shadow state is initialized on vcpu
> >reset to the same value as the host.
> >
> > 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
> >ignored. This means reads result in vmm(4) setting the guest vcpu's
> >%rax and %rdx to 0. (These msr's are ignored in the same manner by
> >other hypervisors like kvm and nvmm.)
> >
> > The average user should not see a change in behavior of vmm(4) or
> > vmd(8). The biggest change is for *Intel* users as this diff changes the
> > current vmx logic which was not injecting #GP for unsupported
> > msr's. (Your guests were potentially getting garbage results from rdmsr
> > instructions.)
> >
> 
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
> 
> > The folks attempting to host the latest release of 9front as a guest on
> > AMD hosts should see their guest boot successfully with this diff :-)
> >
> > -dv
> >
> > [1] https://marc.info/?l=openbsd-tech=161693517121814=2
> >
> >
> > Index: sys/arch/amd64/include/vmmvar.h
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> > retrieving revision 1.70
> > diff -u -p -r1.70 vmmvar.h
> > --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -   1.70
> > +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -
> > @@ -936,6 +936,9 @@ struct vcpu {
> > paddr_t vc_pvclock_system_gpa;
> > uint32_t vc_pvclock_system_tsc_mul;
> >
> > +   /* Shadowed MSRs */
> > +   uint64_t vc_shadow_pat;
> > +
> > /* VMX only */
> > uint64_t vc_vmx_basic;
> > uint64_t vc_vmx_entry_ctls;
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.278
> > diff -u -p -r1.278 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c  11 Mar 2021 11:16:55 -  1.278
> > +++ sys/arch/amd64/amd64/vmm.c  31 Mar 2021 00:15:43 -
> > @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
> >  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
> >  void vmm_init_pvclock(struct vcpu *, paddr_t);
> >  int vmm_update_pvclock(struct vcpu *);
> > +int vmm_pat_is_valid(uint64_t);
> >
> >  #ifdef VMM_DEBUG
> >  void dump_vcpu(struct vcpu *);
> > @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> > /* xcr0 power on default sets bit 0 (x87 state) */
> > vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
> >
> > +   /* XXX PAT shadow */
> > +   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> >  exit:
> > /* Flush the VMCS */
> > if (vmclear(>vc_control_pa)) {
> > @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
> > vcpu->vc_state = VCPU_STATE_STOPPED;
> > vcpu->vc_vpid = 0;
> > vcpu->vc_pvclock_system_gpa = 0;
> > +
> > +   /* Shadow PAT MSR, starting with host's value. */
> > +   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> > if (vmm_softc->mode == VMM_MODE_VMX ||
> > vmm_softc->mode == VMM_MODE_EPT)
> > ret = vcpu_init_vmx(vcpu);
> > @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
> > rdx = >vc_gueststate.vg_rdx;
> >
> > switch (*rcx) {
> > -   case MSR_SMBASE:
> > -   /*
> > -* 34.15.6.3 - Saving Guest State (SMM)
> > -*
> > -* Unsupported, so inject #GP and return without
> > - 

Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

2021-04-05 Thread Aaron Bieber


Dave Voutila writes:

> Dave Voutila writes:
>
>> The following diff cleans up and improves MSR-related event handling in
>> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
>> mentioned in a previous email to tech@ about fixing support for 9front
>> guests [1], we found some discprepencies between vmm(4)'s handling on
>> Intel hosts and AMD hosts.
>>
>> While the diff has been tested already by abieber@ and brynet@ with
>> additional review by mlarkin@, I'm looking for additional testers
>> willing to look for regressions.

Confirmed it's working for me - very latest 9front and nixos work fine
on my ryzen!

>
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.
>
>>
>> This diff specifically improves and standardizes msr-based exit handling
>> between Intel and AMD hosts to the following:
>>
>> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>>handled (e.g. via emulation) or they result in injecting a #GP
>>exception into the guest.
>>
>> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>>handled are ignored (i.e. %rax and %rdx values are not inspected or
>>used).
>>
>> Consequently with the change for (1) above, the diff adds explicit
>> handling for the following MSRs:
>>
>> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>>host state is not touched. The shadow state is initialized on vcpu
>>reset to the same value as the host.
>>
>> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>>ignored. This means reads result in vmm(4) setting the guest vcpu's
>>%rax and %rdx to 0. (These msr's are ignored in the same manner by
>>other hypervisors like kvm and nvmm.)
>>
>> The average user should not see a change in behavior of vmm(4) or
>> vmd(8). The biggest change is for *Intel* users as this diff changes the
>> current vmx logic which was not injecting #GP for unsupported
>> msr's. (Your guests were potentially getting garbage results from rdmsr
>> instructions.)
>>
>
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
>
>> The folks attempting to host the latest release of 9front as a guest on
>> AMD hosts should see their guest boot successfully with this diff :-)
>>
>> -dv
>>
>> [1] https://marc.info/?l=openbsd-tech=161693517121814=2
>>
>>
>> Index: sys/arch/amd64/include/vmmvar.h
>> ===
>> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
>> retrieving revision 1.70
>> diff -u -p -r1.70 vmmvar.h
>> --- sys/arch/amd64/include/vmmvar.h  8 Apr 2020 07:39:48 -   1.70
>> +++ sys/arch/amd64/include/vmmvar.h  31 Mar 2021 00:15:43 -
>> @@ -936,6 +936,9 @@ struct vcpu {
>>  paddr_t vc_pvclock_system_gpa;
>>  uint32_t vc_pvclock_system_tsc_mul;
>>
>> +/* Shadowed MSRs */
>> +uint64_t vc_shadow_pat;
>> +
>>  /* VMX only */
>>  uint64_t vc_vmx_basic;
>>  uint64_t vc_vmx_entry_ctls;
>> Index: sys/arch/amd64/amd64/vmm.c
>> ===
>> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
>> retrieving revision 1.278
>> diff -u -p -r1.278 vmm.c
>> --- sys/arch/amd64/amd64/vmm.c   11 Mar 2021 11:16:55 -  1.278
>> +++ sys/arch/amd64/amd64/vmm.c   31 Mar 2021 00:15:43 -
>> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>>  int vmm_update_pvclock(struct vcpu *);
>> +int vmm_pat_is_valid(uint64_t);
>>
>>  #ifdef VMM_DEBUG
>>  void dump_vcpu(struct vcpu *);
>> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>>  /* xcr0 power on default sets bit 0 (x87 state) */
>>  vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>>
>> +/* XXX PAT shadow */
>> +vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>  exit:
>>  /* Flush the VMCS */
>>  if (vmclear(>vc_control_pa)) {
>> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>>  vcpu->vc_state = VCPU_STATE_STOPPED;
>>  vcpu->vc_vpid = 0;
>>  vcpu->vc_pvclock_system_gpa = 0;
>> +
>> +/* Shadow PAT MSR, starting with host's value. */
>> +vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>  if (vmm_softc->mode == VMM_MODE_VMX ||
>>  vmm_softc->mode == VMM_MODE_EPT)
>>  ret = vcpu_init_vmx(vcpu);
>> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>>  rdx = >vc_gueststate.vg_rdx;
>>
>>  switch (*rcx) {
>> -case MSR_SMBASE:
>> -/*
>> - * 34.15.6.3 - Saving Guest State (SMM)
>> - *
>> - * Unsupported, so inject #GP and return without
>> - * advancing %rip.
>> - */
>> +case 

Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

2021-04-05 Thread Dave Voutila


Dave Voutila writes:

> The following diff cleans up and improves MSR-related event handling in
> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> mentioned in a previous email to tech@ about fixing support for 9front
> guests [1], we found some discprepencies between vmm(4)'s handling on
> Intel hosts and AMD hosts.
>
> While the diff has been tested already by abieber@ and brynet@ with
> additional review by mlarkin@, I'm looking for additional testers
> willing to look for regressions.

Last call for additional testers. Plan is to commit the diff later today
as I have an OK from mlarkin@.

>
> This diff specifically improves and standardizes msr-based exit handling
> between Intel and AMD hosts to the following:
>
> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>handled (e.g. via emulation) or they result in injecting a #GP
>exception into the guest.
>
> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>handled are ignored (i.e. %rax and %rdx values are not inspected or
>used).
>
> Consequently with the change for (1) above, the diff adds explicit
> handling for the following MSRs:
>
> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>host state is not touched. The shadow state is initialized on vcpu
>reset to the same value as the host.
>
> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>ignored. This means reads result in vmm(4) setting the guest vcpu's
>%rax and %rdx to 0. (These msr's are ignored in the same manner by
>other hypervisors like kvm and nvmm.)
>
> The average user should not see a change in behavior of vmm(4) or
> vmd(8). The biggest change is for *Intel* users as this diff changes the
> current vmx logic which was not injecting #GP for unsupported
> msr's. (Your guests were potentially getting garbage results from rdmsr
> instructions.)
>

If you do test the diff and have issues, I forgot to mention to please
build the kernel with VMM_DEBUG. The output to the kernel buffer will
help diagnose any problematic msr access.

> The folks attempting to host the latest release of 9front as a guest on
> AMD hosts should see their guest boot successfully with this diff :-)
>
> -dv
>
> [1] https://marc.info/?l=openbsd-tech=161693517121814=2
>
>
> Index: sys/arch/amd64/include/vmmvar.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.70
> diff -u -p -r1.70 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h   8 Apr 2020 07:39:48 -   1.70
> +++ sys/arch/amd64/include/vmmvar.h   31 Mar 2021 00:15:43 -
> @@ -936,6 +936,9 @@ struct vcpu {
>   paddr_t vc_pvclock_system_gpa;
>   uint32_t vc_pvclock_system_tsc_mul;
>
> + /* Shadowed MSRs */
> + uint64_t vc_shadow_pat;
> +
>   /* VMX only */
>   uint64_t vc_vmx_basic;
>   uint64_t vc_vmx_entry_ctls;
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.278
> diff -u -p -r1.278 vmm.c
> --- sys/arch/amd64/amd64/vmm.c11 Mar 2021 11:16:55 -  1.278
> +++ sys/arch/amd64/amd64/vmm.c31 Mar 2021 00:15:43 -
> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>  int vmm_update_pvclock(struct vcpu *);
> +int vmm_pat_is_valid(uint64_t);
>
>  #ifdef VMM_DEBUG
>  void dump_vcpu(struct vcpu *);
> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>   /* xcr0 power on default sets bit 0 (x87 state) */
>   vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>
> + /* XXX PAT shadow */
> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>  exit:
>   /* Flush the VMCS */
>   if (vmclear(>vc_control_pa)) {
> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>   vcpu->vc_state = VCPU_STATE_STOPPED;
>   vcpu->vc_vpid = 0;
>   vcpu->vc_pvclock_system_gpa = 0;
> +
> + /* Shadow PAT MSR, starting with host's value. */
> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>   if (vmm_softc->mode == VMM_MODE_VMX ||
>   vmm_softc->mode == VMM_MODE_EPT)
>   ret = vcpu_init_vmx(vcpu);
> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>   rdx = >vc_gueststate.vg_rdx;
>
>   switch (*rcx) {
> - case MSR_SMBASE:
> - /*
> -  * 34.15.6.3 - Saving Guest State (SMM)
> -  *
> -  * Unsupported, so inject #GP and return without
> -  * advancing %rip.
> -  */
> + case MSR_BIOS_SIGN:
> + case MSR_PLATFORM_ID:
> + /* Ignored */
> + *rax = 0;
> + *rdx = 0;
> + break;
> + case MSR_CR_PAT:
> + *rax = 

vmm(4): improve {rd,wr}msr exit handling for both amd & intel

2021-04-02 Thread Dave Voutila
The following diff cleans up and improves MSR-related event handling in
vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
mentioned in a previous email to tech@ about fixing support for 9front
guests [1], we found some discprepencies between vmm(4)'s handling on
Intel hosts and AMD hosts.

While the diff has been tested already by abieber@ and brynet@ with
additional review by mlarkin@, I'm looking for additional testers
willing to look for regressions.

This diff specifically improves and standardizes msr-based exit handling
between Intel and AMD hosts to the following:

1. All RDMSR instructions that cause a vm-exit must be explicitly
   handled (e.g. via emulation) or they result in injecting a #GP
   exception into the guest.

2. All WRMSR instructions that cause a vm-exit and are not explicitly
   handled are ignored (i.e. %rax and %rdx values are not inspected or
   used).

Consequently with the change for (1) above, the diff adds explicit
handling for the following MSRs:

1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
   host state is not touched. The shadow state is initialized on vcpu
   reset to the same value as the host.

2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
   ignored. This means reads result in vmm(4) setting the guest vcpu's
   %rax and %rdx to 0. (These msr's are ignored in the same manner by
   other hypervisors like kvm and nvmm.)

The average user should not see a change in behavior of vmm(4) or
vmd(8). The biggest change is for *Intel* users as this diff changes the
current vmx logic which was not injecting #GP for unsupported
msr's. (Your guests were potentially getting garbage results from rdmsr
instructions.)

The folks attempting to host the latest release of 9front as a guest on
AMD hosts should see their guest boot successfully with this diff :-)

-dv

[1] https://marc.info/?l=openbsd-tech=161693517121814=2


Index: sys/arch/amd64/include/vmmvar.h
===
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.70
diff -u -p -r1.70 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -   1.70
+++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -
@@ -936,6 +936,9 @@ struct vcpu {
paddr_t vc_pvclock_system_gpa;
uint32_t vc_pvclock_system_tsc_mul;

+   /* Shadowed MSRs */
+   uint64_t vc_shadow_pat;
+
/* VMX only */
uint64_t vc_vmx_basic;
uint64_t vc_vmx_entry_ctls;
Index: sys/arch/amd64/amd64/vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.278
diff -u -p -r1.278 vmm.c
--- sys/arch/amd64/amd64/vmm.c  11 Mar 2021 11:16:55 -  1.278
+++ sys/arch/amd64/amd64/vmm.c  31 Mar 2021 00:15:43 -
@@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
 void vmm_init_pvclock(struct vcpu *, paddr_t);
 int vmm_update_pvclock(struct vcpu *);
+int vmm_pat_is_valid(uint64_t);

 #ifdef VMM_DEBUG
 void dump_vcpu(struct vcpu *);
@@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
/* xcr0 power on default sets bit 0 (x87 state) */
vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;

+   /* XXX PAT shadow */
+   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
 exit:
/* Flush the VMCS */
if (vmclear(>vc_control_pa)) {
@@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
vcpu->vc_state = VCPU_STATE_STOPPED;
vcpu->vc_vpid = 0;
vcpu->vc_pvclock_system_gpa = 0;
+
+   /* Shadow PAT MSR, starting with host's value. */
+   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
if (vmm_softc->mode == VMM_MODE_VMX ||
vmm_softc->mode == VMM_MODE_EPT)
ret = vcpu_init_vmx(vcpu);
@@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
rdx = >vc_gueststate.vg_rdx;

switch (*rcx) {
-   case MSR_SMBASE:
-   /*
-* 34.15.6.3 - Saving Guest State (SMM)
-*
-* Unsupported, so inject #GP and return without
-* advancing %rip.
-*/
+   case MSR_BIOS_SIGN:
+   case MSR_PLATFORM_ID:
+   /* Ignored */
+   *rax = 0;
+   *rdx = 0;
+   break;
+   case MSR_CR_PAT:
+   *rax = (vcpu->vc_shadow_pat & 0xULL);
+   *rdx = (vcpu->vc_shadow_pat >> 32);
+   break;
+   default:
+   /* Unsupported MSRs causes #GP exception, don't advance %rip */
+   DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
+   __func__, *rcx);
ret = vmm_inject_gp(vcpu);
return (ret);
}

-   *rax = 0;
-   *rdx = 0;
-
-#ifdef VMM_DEBUG
-