Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mike Larkin
On Sat, Apr 29, 2017 at 05:24:22PM -0700, Pratik Vyas wrote:
> Hello tech@,
> 
> This is a patch that extends the readregs and writeregs vmm(4) ioctl to
> read and write MSRs as well.
> 
> It also sets the IA32_VMX_IA32E_MODE_GUEST entry control in
> vcpu_reset_regs based on the value of EFER_LMA.
> 
> There are changes to vmmvar.h and would require a `make includes` step
> to build vmd(8). This should also not alter any behaviour of vmd(8).
> 
> Context: These are the kernel changes required to implement vmctl send
> and vmctl receive. vmctl send / receive are two new options that will
> support snapshotting VMs and migrating VMs from one host to another.
> This project was undertaken at San Jose State University along with my
> three teammates CCed with mlarkin@ as our advisor.
> 
> Patches to vmd(8) that implement vmctl send and vmctl receive are
> forthcoming.
> 
> 
> Thanks,
> Pratik
> 

committed, thanks.

> 
> 
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.137
> diff -u -p -a -u -r1.137 vmm.c
> --- sys/arch/amd64/amd64/vmm.c28 Apr 2017 10:09:37 -  1.137
> +++ sys/arch/amd64/amd64/vmm.c30 Apr 2017 00:01:21 -
> @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
>   uint64_t sel, limit, ar;
>   uint64_t *gprs = vrs->vrs_gprs;
>   uint64_t *crs = vrs->vrs_crs;
> + uint64_t *msrs = vrs->vrs_msrs;
>   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> + struct vmx_msr_store *msr_store;
> 
>   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
>   return (EINVAL);
> @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
>   goto errout;
>   }
> 
> + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> +
> + if (regmask & VM_RWREGS_MSRS) {
> + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> + msrs[i] = msr_store[i].vms_data;
> + }
> + }
> +
>   goto out;
> 
> errout:
> @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
>   uint64_t limit, ar;
>   uint64_t *gprs = vrs->vrs_gprs;
>   uint64_t *crs = vrs->vrs_crs;
> + uint64_t *msrs = vrs->vrs_msrs;
>   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> + struct vmx_msr_store *msr_store;
> 
>   if (loadvmcs) {
>   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
>   goto errout;
>   }
> 
> + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> +
> + if (regmask & VM_RWREGS_MSRS) {
> + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> + msr_store[i].vms_data = msrs[i];
> + }
> + }
> +
>   goto out;
> 
> errout:
> @@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>* IA32_VMX_LOAD_DEBUG_CONTROLS
>* IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
>*/
> - if (ug == 1)
> + if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
>   want1 = 0;
>   else
>   want1 = IA32_VMX_IA32E_MODE_GUEST;
> @@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>*/
>   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> 
> - /*
> -  * Make sure LME is enabled in EFER if restricted guest mode is
> -  * needed.
> -  */
> - msr_store[0].vms_index = MSR_EFER;
> - if (ug == 1)
> - msr_store[0].vms_data = 0ULL;   /* Initial value */
> - else
> - msr_store[0].vms_data = EFER_LME;
> -
> - msr_store[1].vms_index = MSR_STAR;
> - msr_store[1].vms_data = 0ULL;   /* Initial value */
> - msr_store[2].vms_index = MSR_LSTAR;
> - msr_store[2].vms_data = 0ULL;   /* Initial value */
> - msr_store[3].vms_index = MSR_CSTAR;
> - msr_store[3].vms_data = 0ULL;   /* Initial value */
> - msr_store[4].vms_index = MSR_SFMASK;
> - msr_store[4].vms_data = 0ULL;   /* Initial value */
> - msr_store[5].vms_index = MSR_KERNELGSBASE;
> - msr_store[5].vms_data = 0ULL;   /* Initial value */
> + msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
> + msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
> + msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
> + msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR;
> + msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK;
> + msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;
> 
>   /*
>* Currently we have the same count of entry/exit MSRs loads/stores
> @@ -2359,6 +2365,13 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>   ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);
> 
>   /*
> +  * Make sure LME is enabled in EFER if restricted guest mode is
> +  * needed.
> +   

Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mike Larkin
On Mon, May 01, 2017 at 10:08:28PM +0200, Mark Kettenis wrote:
> > Date: Mon, 1 May 2017 12:35:32 -0700
> > From: Mike Larkin 
> > 
> > Yes, the list is locked down to those listed in the diff (those are the 
> > only 
> > ones in the permissions bitmap anyway).
> > 
> > I don't think there are any bits we should not allow touching via
> > this mechanism. The guest can already do that itself (since we
> > whitelist these MSRs in the bitmap). If we find there is a bit, say,
> > in EFER, that we need to lock down, then we need to do that
> > differently (there will be more changes than just this diff). I
> > think this can go in and we can address that potential issue later
> > (but I don't think there is an issue).
> > 
> > One thing that will also need to be addressed (later) is the
> > handling of FPU state, which was recently improved. We can choose to
> > either just flush the state and set CR0_TS on the target after
> > receive, or try to figure out all the state that needs to be sent
> > over, and transfer that. Of course, there's always the possibility
> > that we are sending to a machine with less FPU capability (or more)
> > than the source, and we'll need to improve checking there as well
> > (that probably amounts to transferring %xcr0 and checking that what
> > the "sending side" wants is a subset of what the "receiving side"
> > can accept).
> 
> I fear that you'll have to make sure that vmd on the new machine masks
> off any functionality not offered by the old machine and refuse a
> transfer if the capability set of the new machine isn't a strict
> superset of the old machine.
> 

Yes, FPU is just the tip of the iceberg here. More stuff for these guys
to implement :)

> > I'll work with Pratik and the rest of the team to clean that up later.
> > 
> > Other than the reserved bits question, any other thoughts/ ok?
> 
> Nope. Go ahead.
> 

Thanks, I'll commit it tonight.

-ml

> > > > > ===
> > > > > RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > > > > retrieving revision 1.137
> > > > > diff -u -p -a -u -r1.137 vmm.c
> > > > > --- sys/arch/amd64/amd64/vmm.c28 Apr 2017 10:09:37 -  
> > > > > 1.137
> > > > > +++ sys/arch/amd64/amd64/vmm.c30 Apr 2017 00:01:21 -
> > > > > @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > > > >   uint64_t sel, limit, ar;
> > > > >   uint64_t *gprs = vrs->vrs_gprs;
> > > > >   uint64_t *crs = vrs->vrs_crs;
> > > > > + uint64_t *msrs = vrs->vrs_msrs;
> > > > >   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > > > + struct vmx_msr_store *msr_store;
> > > > > 
> > > > >   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > > > >   return (EINVAL);
> > > > > @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > > > >   goto errout;
> > > > >   }
> > > > > 
> > > > > + msr_store = (struct vmx_msr_store 
> > > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > > > > +
> > > > > + if (regmask & VM_RWREGS_MSRS) {
> > > > > + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > > > > + msrs[i] = msr_store[i].vms_data;
> > > > > + }
> > > > > + }
> > > > > +
> > > > >   goto out;
> > > > > 
> > > > > errout:
> > > > > @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > > > >   uint64_t limit, ar;
> > > > >   uint64_t *gprs = vrs->vrs_gprs;
> > > > >   uint64_t *crs = vrs->vrs_crs;
> > > > > + uint64_t *msrs = vrs->vrs_msrs;
> > > > >   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > > > + struct vmx_msr_store *msr_store;
> > > > > 
> > > > >   if (loadvmcs) {
> > > > >   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > > > > @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > > > >   goto errout;
> > > > >   }
> > > > > 
> > > > > + msr_store = (struct vmx_msr_store 
> > > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > > > > +
> > > > > + if (regmask & VM_RWREGS_MSRS) {
> > > > > + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > > > > + msr_store[i].vms_data = msrs[i];
> > > > > + }
> > > > > + }
> > > > > +
> > > > >   goto out;
> > > > > 
> > > > > errout:
> > > > > @@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> > > > >* IA32_VMX_LOAD_DEBUG_CONTROLS
> > > > >* IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
> > > > >*/
> > > > > - if (ug == 1)
> > > > > + if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
> > > > >   want1 = 0;
> > > > >   else
> > > > >   want1 = IA32_VMX_IA32E_MODE_GUEST;
> > > > > @@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> > > > >*/
> > > > >   msr_store = (struct vmx_msr_store 
> > > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > 

Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mark Kettenis
> Date: Mon, 1 May 2017 12:35:32 -0700
> From: Mike Larkin 
> 
> Yes, the list is locked down to those listed in the diff (those are the only 
> ones in the permissions bitmap anyway).
> 
> I don't think there are any bits we should not allow touching via
> this mechanism. The guest can already do that itself (since we
> whitelist these MSRs in the bitmap). If we find there is a bit, say,
> in EFER, that we need to lock down, then we need to do that
> differently (there will be more changes than just this diff). I
> think this can go in and we can address that potential issue later
> (but I don't think there is an issue).
> 
> One thing that will also need to be addressed (later) is the
> handling of FPU state, which was recently improved. We can choose to
> either just flush the state and set CR0_TS on the target after
> receive, or try to figure out all the state that needs to be sent
> over, and transfer that. Of course, there's always the possibility
> that we are sending to a machine with less FPU capability (or more)
> than the source, and we'll need to improve checking there as well
> (that probably amounts to transferring %xcr0 and checking that what
> the "sending side" wants is a subset of what the "receiving side"
> can accept).

I fear that you'll have to make sure that vmd on the new machine masks
off any functionality not offered by the old machine and refuse a
transfer if the capability set of the new machine isn't a strict
superset of the old machine.

> I'll work with Pratik and the rest of the team to clean that up later.
> 
> Other than the reserved bits question, any other thoughts/ ok?

Nope. Go ahead.

> > > > ===
> > > > RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > > > retrieving revision 1.137
> > > > diff -u -p -a -u -r1.137 vmm.c
> > > > --- sys/arch/amd64/amd64/vmm.c  28 Apr 2017 10:09:37 -  1.137
> > > > +++ sys/arch/amd64/amd64/vmm.c  30 Apr 2017 00:01:21 -
> > > > @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > > > uint64_t sel, limit, ar;
> > > > uint64_t *gprs = vrs->vrs_gprs;
> > > > uint64_t *crs = vrs->vrs_crs;
> > > > +   uint64_t *msrs = vrs->vrs_msrs;
> > > > struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > > +   struct vmx_msr_store *msr_store;
> > > > 
> > > > if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > > > return (EINVAL);
> > > > @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > > > goto errout;
> > > > }
> > > > 
> > > > +   msr_store = (struct vmx_msr_store 
> > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > > > +
> > > > +   if (regmask & VM_RWREGS_MSRS) {
> > > > +   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > > > +   msrs[i] = msr_store[i].vms_data;
> > > > +   }
> > > > +   }
> > > > +
> > > > goto out;
> > > > 
> > > > errout:
> > > > @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > > > uint64_t limit, ar;
> > > > uint64_t *gprs = vrs->vrs_gprs;
> > > > uint64_t *crs = vrs->vrs_crs;
> > > > +   uint64_t *msrs = vrs->vrs_msrs;
> > > > struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > > +   struct vmx_msr_store *msr_store;
> > > > 
> > > > if (loadvmcs) {
> > > > if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > > > @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > > > goto errout;
> > > > }
> > > > 
> > > > +   msr_store = (struct vmx_msr_store 
> > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > > > +
> > > > +   if (regmask & VM_RWREGS_MSRS) {
> > > > +   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > > > +   msr_store[i].vms_data = msrs[i];
> > > > +   }
> > > > +   }
> > > > +
> > > > goto out;
> > > > 
> > > > errout:
> > > > @@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> > > >  * IA32_VMX_LOAD_DEBUG_CONTROLS
> > > >  * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
> > > >  */
> > > > -   if (ug == 1)
> > > > +   if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
> > > > want1 = 0;
> > > > else
> > > > want1 = IA32_VMX_IA32E_MODE_GUEST;
> > > > @@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> > > >  */
> > > > msr_store = (struct vmx_msr_store 
> > > > *)vcpu->vc_vmx_msr_exit_save_va;
> > > > 
> > > > -   /*
> > > > -* Make sure LME is enabled in EFER if restricted guest mode is
> > > > -* needed.
> > > > -*/
> > > > -   msr_store[0].vms_index = MSR_EFER;
> > > > -   if (ug == 1)
> > > > -   msr_store[0].vms_data = 0ULL;   /* Initial value */
> > > > -   else
> > > > -  

Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mike Larkin
On Mon, May 01, 2017 at 08:30:42PM +0200, Mark Kettenis wrote:
> > Date: Mon, 1 May 2017 11:16:07 -0700
> > From: Mike Larkin 
> > 
> > On Sat, Apr 29, 2017 at 05:24:22PM -0700, Pratik Vyas wrote:
> > > Hello tech@,
> > > 
> > > This is a patch that extends the readregs and writeregs vmm(4) ioctl to
> > > read and write MSRs as well.
> > > 
> > > It also sets the IA32_VMX_IA32E_MODE_GUEST entry control in
> > > vcpu_reset_regs based on the value of EFER_LMA.
> > > 
> > > There are changes to vmmvar.h and would require a `make includes` step
> > > to build vmd(8). This should also not alter any behaviour of vmd(8).
> > > 
> > > Context: These are the kernel changes required to implement vmctl send
> > > and vmctl receive. vmctl send / receive are two new options that will
> > > support snapshotting VMs and migrating VMs from one host to another.
> > > This project was undertaken at San Jose State University along with my
> > > three teammates CCed with mlarkin@ as our advisor.
> > > 
> > > Patches to vmd(8) that implement vmctl send and vmctl receive are
> > > forthcoming.
> > > 
> > > 
> > > Thanks,
> > > Pratik
> > > 
> > > 
> > 
> > For some more color commentary - this diff is needed to provide vmd a way to
> > access the MSRs via the register set it sends to vmm for VM initialization.
> > Without that, the VM resumes with default/power on state, and that won't 
> > work
> > to restore a VM that had already been running.
> > 
> > I'll do some quick testing myself (I know this team has already tested 
> > OpenBSD
> > and Linux guests with this code, for both bios and non-bios boots) and if I
> > don't see any problems, I'll commit this tonight. Unless someone objects?
> 
> It only allows reading/writing selected MSRs isn't it?
> 
> Are there any bits in those MSRs that vmd shouldn't touch?
> 

Yes, the list is locked down to those listed in the diff (those are the only 
ones in the permissions bitmap anyway).

I don't think there are any bits we should not allow touching via this
mechanism. The guest can already do that itself (since we whitelist these MSRs
in the bitmap). If we find there is a bit, say, in EFER, that we need to lock
down, then we need to do that differently (there will be more changes than
just this diff). I think this can go in and we can address that potential issue
later (but I don't think there is an issue).

One thing that will also need to be addressed (later) is the handling of FPU
state, which was recently improved. We can choose to either just flush
the state and set CR0_TS on the target after receive, or try to figure out
all the state that needs to be sent over, and transfer that. Of course, there's
always the possibility that we are sending to a machine with less FPU
capability (or more) than the source, and we'll need to improve checking there
as well (that probably amounts to transferring %xcr0 and checking that what the
"sending side" wants is a subset of what the "receiving side" can accept).

I'll work with Pratik and the rest of the team to clean that up later.

Other than the reserved bits question, any other thoughts/ ok?

-ml



> > > ===
> > > RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > > retrieving revision 1.137
> > > diff -u -p -a -u -r1.137 vmm.c
> > > --- sys/arch/amd64/amd64/vmm.c28 Apr 2017 10:09:37 -  1.137
> > > +++ sys/arch/amd64/amd64/vmm.c30 Apr 2017 00:01:21 -
> > > @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > >   uint64_t sel, limit, ar;
> > >   uint64_t *gprs = vrs->vrs_gprs;
> > >   uint64_t *crs = vrs->vrs_crs;
> > > + uint64_t *msrs = vrs->vrs_msrs;
> > >   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > + struct vmx_msr_store *msr_store;
> > > 
> > >   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > >   return (EINVAL);
> > > @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > >   goto errout;
> > >   }
> > > 
> > > + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> > > +
> > > + if (regmask & VM_RWREGS_MSRS) {
> > > + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > > + msrs[i] = msr_store[i].vms_data;
> > > + }
> > > + }
> > > +
> > >   goto out;
> > > 
> > > errout:
> > > @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > >   uint64_t limit, ar;
> > >   uint64_t *gprs = vrs->vrs_gprs;
> > >   uint64_t *crs = vrs->vrs_crs;
> > > + uint64_t *msrs = vrs->vrs_msrs;
> > >   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > > + struct vmx_msr_store *msr_store;
> > > 
> > >   if (loadvmcs) {
> > >   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > > @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > >   goto errout;
> > >   }
> > > 
> > > + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> > > +
> > > + if (regmask & VM_RWREGS_MSRS) {
> 

Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mark Kettenis
> Date: Mon, 1 May 2017 11:16:07 -0700
> From: Mike Larkin 
> 
> On Sat, Apr 29, 2017 at 05:24:22PM -0700, Pratik Vyas wrote:
> > Hello tech@,
> > 
> > This is a patch that extends the readregs and writeregs vmm(4) ioctl to
> > read and write MSRs as well.
> > 
> > It also sets the IA32_VMX_IA32E_MODE_GUEST entry control in
> > vcpu_reset_regs based on the value of EFER_LMA.
> > 
> > There are changes to vmmvar.h and would require a `make includes` step
> > to build vmd(8). This should also not alter any behaviour of vmd(8).
> > 
> > Context: These are the kernel changes required to implement vmctl send
> > and vmctl receive. vmctl send / receive are two new options that will
> > support snapshotting VMs and migrating VMs from one host to another.
> > This project was undertaken at San Jose State University along with my
> > three teammates CCed with mlarkin@ as our advisor.
> > 
> > Patches to vmd(8) that implement vmctl send and vmctl receive are
> > forthcoming.
> > 
> > 
> > Thanks,
> > Pratik
> > 
> > 
> 
> For some more color commentary - this diff is needed to provide vmd a way to
> access the MSRs via the register set it sends to vmm for VM initialization.
> Without that, the VM resumes with default/power on state, and that won't work
> to restore a VM that had already been running.
> 
> I'll do some quick testing myself (I know this team has already tested OpenBSD
> and Linux guests with this code, for both bios and non-bios boots) and if I
> don't see any problems, I'll commit this tonight. Unless someone objects?

It only allows reading/writing selected MSRs isn't it?

Are there any bits in those MSRs that vmd shouldn't touch?

> > Index: sys/arch/amd64/amd64/vmm.c
> > ===
> > RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.137
> > diff -u -p -a -u -r1.137 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c  28 Apr 2017 10:09:37 -  1.137
> > +++ sys/arch/amd64/amd64/vmm.c  30 Apr 2017 00:01:21 -
> > @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > uint64_t sel, limit, ar;
> > uint64_t *gprs = vrs->vrs_gprs;
> > uint64_t *crs = vrs->vrs_crs;
> > +   uint64_t *msrs = vrs->vrs_msrs;
> > struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > +   struct vmx_msr_store *msr_store;
> > 
> > if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > return (EINVAL);
> > @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > goto errout;
> > }
> > 
> > +   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> > +
> > +   if (regmask & VM_RWREGS_MSRS) {
> > +   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > +   msrs[i] = msr_store[i].vms_data;
> > +   }
> > +   }
> > +
> > goto out;
> > 
> > errout:
> > @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > uint64_t limit, ar;
> > uint64_t *gprs = vrs->vrs_gprs;
> > uint64_t *crs = vrs->vrs_crs;
> > +   uint64_t *msrs = vrs->vrs_msrs;
> > struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > +   struct vmx_msr_store *msr_store;
> > 
> > if (loadvmcs) {
> > if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> > @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
> > goto errout;
> > }
> > 
> > +   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> > +
> > +   if (regmask & VM_RWREGS_MSRS) {
> > +   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> > +   msr_store[i].vms_data = msrs[i];
> > +   }
> > +   }
> > +
> > goto out;
> > 
> > errout:
> > @@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> >  * IA32_VMX_LOAD_DEBUG_CONTROLS
> >  * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
> >  */
> > -   if (ug == 1)
> > +   if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
> > want1 = 0;
> > else
> > want1 = IA32_VMX_IA32E_MODE_GUEST;
> > @@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> >  */
> > msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> > 
> > -   /*
> > -* Make sure LME is enabled in EFER if restricted guest mode is
> > -* needed.
> > -*/
> > -   msr_store[0].vms_index = MSR_EFER;
> > -   if (ug == 1)
> > -   msr_store[0].vms_data = 0ULL;   /* Initial value */
> > -   else
> > -   msr_store[0].vms_data = EFER_LME;
> > -
> > -   msr_store[1].vms_index = MSR_STAR;
> > -   msr_store[1].vms_data = 0ULL;   /* Initial value */
> > -   msr_store[2].vms_index = MSR_LSTAR;
> > -   msr_store[2].vms_data = 0ULL;   /* Initial value */
> > -   msr_store[3].vms_index = MSR_CSTAR;
> > -   msr_store[3].vms_data = 0ULL;   /* Initial value */
> > -   msr_store[4].vms_index = MSR_SFMASK;
> > -   msr_store[4].vms_data = 0ULL;   /* Initial value 

Re: [PATCH] vmm: Add MSRs to readregs / writeregs

2017-05-01 Thread Mike Larkin
On Sat, Apr 29, 2017 at 05:24:22PM -0700, Pratik Vyas wrote:
> Hello tech@,
> 
> This is a patch that extends the readregs and writeregs vmm(4) ioctl to
> read and write MSRs as well.
> 
> It also sets the IA32_VMX_IA32E_MODE_GUEST entry control in
> vcpu_reset_regs based on the value of EFER_LMA.
> 
> There are changes to vmmvar.h and would require a `make includes` step
> to build vmd(8). This should also not alter any behaviour of vmd(8).
> 
> Context: These are the kernel changes required to implement vmctl send
> and vmctl receive. vmctl send / receive are two new options that will
> support snapshotting VMs and migrating VMs from one host to another.
> This project was undertaken at San Jose State University along with my
> three teammates CCed with mlarkin@ as our advisor.
> 
> Patches to vmd(8) that implement vmctl send and vmctl receive are
> forthcoming.
> 
> 
> Thanks,
> Pratik
> 
> 

For some more color commentary - this diff is needed to provide vmd a way to
access the MSRs via the register set it sends to vmm for VM initialization.
Without that, the VM resumes with default/power on state, and that won't work
to restore a VM that had already been running.

I'll do some quick testing myself (I know this team has already tested OpenBSD
and Linux guests with this code, for both bios and non-bios boots) and if I
don't see any problems, I'll commit this tonight. Unless someone objects?

-ml


> 
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.137
> diff -u -p -a -u -r1.137 vmm.c
> --- sys/arch/amd64/amd64/vmm.c28 Apr 2017 10:09:37 -  1.137
> +++ sys/arch/amd64/amd64/vmm.c30 Apr 2017 00:01:21 -
> @@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
>   uint64_t sel, limit, ar;
>   uint64_t *gprs = vrs->vrs_gprs;
>   uint64_t *crs = vrs->vrs_crs;
> + uint64_t *msrs = vrs->vrs_msrs;
>   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> + struct vmx_msr_store *msr_store;
> 
>   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
>   return (EINVAL);
> @@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
>   goto errout;
>   }
> 
> + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> +
> + if (regmask & VM_RWREGS_MSRS) {
> + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> + msrs[i] = msr_store[i].vms_data;
> + }
> + }
> +
>   goto out;
> 
> errout:
> @@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
>   uint64_t limit, ar;
>   uint64_t *gprs = vrs->vrs_gprs;
>   uint64_t *crs = vrs->vrs_crs;
> + uint64_t *msrs = vrs->vrs_msrs;
>   struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> + struct vmx_msr_store *msr_store;
> 
>   if (loadvmcs) {
>   if (vcpu_reload_vmcs_vmx(>vc_control_pa))
> @@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
>   goto errout;
>   }
> 
> + msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> +
> + if (regmask & VM_RWREGS_MSRS) {
> + for (i = 0; i < VCPU_REGS_NMSRS; i++) {
> + msr_store[i].vms_data = msrs[i];
> + }
> + }
> +
>   goto out;
> 
> errout:
> @@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>* IA32_VMX_LOAD_DEBUG_CONTROLS
>* IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
>*/
> - if (ug == 1)
> + if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
>   want1 = 0;
>   else
>   want1 = IA32_VMX_IA32E_MODE_GUEST;
> @@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>*/
>   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
> 
> - /*
> -  * Make sure LME is enabled in EFER if restricted guest mode is
> -  * needed.
> -  */
> - msr_store[0].vms_index = MSR_EFER;
> - if (ug == 1)
> - msr_store[0].vms_data = 0ULL;   /* Initial value */
> - else
> - msr_store[0].vms_data = EFER_LME;
> -
> - msr_store[1].vms_index = MSR_STAR;
> - msr_store[1].vms_data = 0ULL;   /* Initial value */
> - msr_store[2].vms_index = MSR_LSTAR;
> - msr_store[2].vms_data = 0ULL;   /* Initial value */
> - msr_store[3].vms_index = MSR_CSTAR;
> - msr_store[3].vms_data = 0ULL;   /* Initial value */
> - msr_store[4].vms_index = MSR_SFMASK;
> - msr_store[4].vms_data = 0ULL;   /* Initial value */
> - msr_store[5].vms_index = MSR_KERNELGSBASE;
> - msr_store[5].vms_data = 0ULL;   /* Initial value */
> + msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
> + msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
> + msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
> + 

[PATCH] vmm: Add MSRs to readregs / writeregs

2017-04-29 Thread Pratik Vyas

Hello tech@,

This is a patch that extends the readregs and writeregs vmm(4) ioctl to
read and write MSRs as well.

It also sets the IA32_VMX_IA32E_MODE_GUEST entry control in
vcpu_reset_regs based on the value of EFER_LMA.

There are changes to vmmvar.h and would require a `make includes` step
to build vmd(8). This should also not alter any behaviour of vmd(8).

Context: These are the kernel changes required to implement vmctl send
and vmctl receive. vmctl send / receive are two new options that will
support snapshotting VMs and migrating VMs from one host to another.
This project was undertaken at San Jose State University along with my
three teammates CCed with mlarkin@ as our advisor.

Patches to vmd(8) that implement vmctl send and vmctl receive are
forthcoming.


Thanks,
Pratik



Index: sys/arch/amd64/amd64/vmm.c
===
RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.137
diff -u -p -a -u -r1.137 vmm.c
--- sys/arch/amd64/amd64/vmm.c  28 Apr 2017 10:09:37 -  1.137
+++ sys/arch/amd64/amd64/vmm.c  30 Apr 2017 00:01:21 -
@@ -1334,7 +1334,9 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
uint64_t sel, limit, ar;
uint64_t *gprs = vrs->vrs_gprs;
uint64_t *crs = vrs->vrs_crs;
+   uint64_t *msrs = vrs->vrs_msrs;
struct vcpu_segment_info *sregs = vrs->vrs_sregs;
+   struct vmx_msr_store *msr_store;

if (vcpu_reload_vmcs_vmx(>vc_control_pa))
return (EINVAL);
@@ -1402,6 +1404,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
goto errout;
}

+   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
+
+   if (regmask & VM_RWREGS_MSRS) {
+   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
+   msrs[i] = msr_store[i].vms_data;
+   }
+   }
+
goto out;

errout:
@@ -1448,7 +1458,9 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
uint64_t limit, ar;
uint64_t *gprs = vrs->vrs_gprs;
uint64_t *crs = vrs->vrs_crs;
+   uint64_t *msrs = vrs->vrs_msrs;
struct vcpu_segment_info *sregs = vrs->vrs_sregs;
+   struct vmx_msr_store *msr_store;

if (loadvmcs) {
if (vcpu_reload_vmcs_vmx(>vc_control_pa))
@@ -1518,6 +1530,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
goto errout;
}

+   msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
+
+   if (regmask & VM_RWREGS_MSRS) {
+   for (i = 0; i < VCPU_REGS_NMSRS; i++) {
+   msr_store[i].vms_data = msrs[i];
+   }
+   }
+
goto out;

errout:
@@ -2128,7 +2148,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
 * IA32_VMX_LOAD_DEBUG_CONTROLS
 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
 */
-   if (ug == 1)
+   if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
want1 = 0;
else
want1 = IA32_VMX_IA32E_MODE_GUEST;
@@ -2277,26 +2297,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
 */
msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;

-   /*
-* Make sure LME is enabled in EFER if restricted guest mode is
-* needed.
-*/
-   msr_store[0].vms_index = MSR_EFER;
-   if (ug == 1)
-   msr_store[0].vms_data = 0ULL;   /* Initial value */
-   else
-   msr_store[0].vms_data = EFER_LME;
-
-   msr_store[1].vms_index = MSR_STAR;
-   msr_store[1].vms_data = 0ULL;   /* Initial value */
-   msr_store[2].vms_index = MSR_LSTAR;
-   msr_store[2].vms_data = 0ULL;   /* Initial value */
-   msr_store[3].vms_index = MSR_CSTAR;
-   msr_store[3].vms_data = 0ULL;   /* Initial value */
-   msr_store[4].vms_index = MSR_SFMASK;
-   msr_store[4].vms_data = 0ULL;   /* Initial value */
-   msr_store[5].vms_index = MSR_KERNELGSBASE;
-   msr_store[5].vms_data = 0ULL;   /* Initial value */
+   msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER;
+   msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR;
+   msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR;
+   msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR;
+   msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK;
+   msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE;

/*
 * Currently we have the same count of entry/exit MSRs loads/stores
@@ -2359,6 +2365,13 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);

/*
+* Make sure LME is enabled in EFER if restricted guest mode is
+* needed.
+*/
+   if (ug == 0)
+   msr_store[VCPU_REGS_EFER].vms_data |= EFER_LME;
+
+   /*
 * Set up the MSR bitmap
 */
memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF,