On Fri, Feb 07, 2020 at 01:25:38PM -0800, Mike Larkin wrote:
> On Fri, Feb 07, 2020 at 04:20:16AM +0000, Adam Steen wrote:
> > Hi
> >
> > Please see the attached patch to add an 'IOCTL handler to sets the access
> > protections of the ept'
> >
> > vmd(8) does not make use of this change, but solo5, which uses vmm(4) as
> > a backend hypervisor. The code calling 'VMM_IOC_MPROTECT_EPT' is
> > available here
> > https://github.com/Solo5/solo5/compare/master...adamsteen:wnox
> >
> > there are changes to vmd too, but this is just to ensure completeness,
> > if mprotect ept is called in the future, we would want the vm to be
> > stopped if we get a protection fault.
> >
> > I was unsure what todo if called with execute only permissions on a cpu that
> > does not support it. I went with add read permissions and logging the
> > fact, instead of returning EINVAL.
> >
> > Cheers
> > Adam
> >
>
> I have been giving Adam feedback on this diff for a while. There are a few
> minor comments below, but I think this is ok if someone wants to commit it
> after
> the fixes below are incorporated.
>
> -ml
>
See updated comment below.
-ml
> > ? div
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.258
> > diff -u -p -u -p -r1.258 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c 31 Jan 2020 01:51:27 -0000 1.258
> > +++ sys/arch/amd64/amd64/vmm.c 7 Feb 2020 03:15:16 -0000
> > @@ -124,6 +124,7 @@ int vm_get_info(struct vm_info_params *)
> > int vm_resetcpu(struct vm_resetcpu_params *);
> > int vm_intr_pending(struct vm_intr_params *);
> > int vm_rwregs(struct vm_rwregs_params *, int);
> > +int vm_mprotect_ept(struct vm_mprotect_ept_params *);
> > int vm_rwvmparams(struct vm_rwvmparams_params *, int);
> > int vm_find(uint32_t, struct vm **);
> > int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> > @@ -186,6 +187,8 @@ int svm_fault_page(struct vcpu *, paddr_
> > int vmx_fault_page(struct vcpu *, paddr_t);
> > int vmx_handle_np_fault(struct vcpu *);
> > int svm_handle_np_fault(struct vcpu *);
> > +int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
> > +pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t);
> > int vmm_alloc_vpid(uint16_t *);
> > void vmm_free_vpid(uint16_t);
> > const char *vcpu_state_decode(u_int);
> > @@ -493,6 +496,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t
> > case VMM_IOC_WRITEREGS:
> > ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
> > break;
> > + case VMM_IOC_MPROTECT_EPT:
> > + ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
> > + break;
> > case VMM_IOC_READVMPARAMS:
> > ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
> > break;
> > @@ -531,6 +537,7 @@ pledge_ioctl_vmm(struct proc *p, long co
> > case VMM_IOC_INTR:
> > case VMM_IOC_READREGS:
> > case VMM_IOC_WRITEREGS:
> > + case VMM_IOC_MPROTECT_EPT:
> > case VMM_IOC_READVMPARAMS:
> > case VMM_IOC_WRITEVMPARAMS:
> > return (0);
> > @@ -806,6 +813,288 @@ vm_rwregs(struct vm_rwregs_params *vrwp,
> > }
> >
> > /*
> > + * vm_mprotect_ept
> > + *
> > + * IOCTL handler to sets the access protections of the ept
> > + *
> > + * Parameters:
> > + * vmep: decribes the memory for which the protect will be applied..
> > + *
> > + * Return values:
> > + * 0: if successful
> > + * ENOENT: if the VM defined by 'vmep' cannot be found
> > + * EINVAL: if the sgpa or size is not page aligned, the prot is invalid,
> > + * size is too large (512GB), there is wraparound
> > + * (like start = 512GB-1 and end = 512GB-2),
> > + * the address specified is not within the vm's mem range
> > + * or the address lies inside reserved (MMIO) memory
> > + */
> > +int
> > +vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
> > +{
> > + struct vm *vm;
> > + struct vcpu *vcpu;
> > + vaddr_t sgpa;
> > + size_t size;
> > + vm_prot_t prot;
> > + uint64_t msr;
> > + int ret, memtype;
> > +
> > + /* If not EPT or RVI, nothing to do here */
> > + if (!(vmm_softc->mode == VMM_MODE_EPT
> > + || vmm_softc->mode == VMM_MODE_RVI))
> > + return (0);
> > +
> > + /* Find the desired VM */
> > + rw_enter_read(&vmm_softc->vm_lock);
> > + ret = vm_find(vmep->vmep_vm_id, &vm);
> > + rw_exit_read(&vmm_softc->vm_lock);
> > +
> > + /* Not found? exit. */
> > + if (ret != 0) {
> > + DPRINTF("%s: vm id %u not found\n", __func__,
> > + vmep->vmep_vm_id);
> > + return (ret);
> > + }
> > +
> > + rw_enter_read(&vm->vm_vcpu_lock);
> > + SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
> > + if (vcpu->vc_id == vmep->vmep_vcpu_id)
> > + break;
> > + }
> > + rw_exit_read(&vm->vm_vcpu_lock);
> > +
> > + if (vcpu == NULL) {
> > + DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
> > + vmep->vmep_vcpu_id, vmep->vmep_vm_id);
> > + return (ENOENT);
> > + }
> > +
> > + if (vcpu->vc_state != VCPU_STATE_STOPPED) {
> > + DPRINTF("%s: mprotect_ept %u on vm %u attempted "
> > + "while vcpu was in state %u (%s)\n", __func__,
> > + vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
> > + vcpu_state_decode(vcpu->vc_state));
> > +
> > + return (EBUSY);
> > + }
> > +
> > + /* Only proceed if the pmap is in the correct mode */
> > + KASSERT((vmm_softc->mode == VMM_MODE_EPT &&
> > + vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||
> > + (vmm_softc->mode == VMM_MODE_RVI &&
> > + vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI));
> > +
> > + sgpa = vmep->vmep_sgpa;
> > + size = vmep->vmep_size;
> > + prot = vmep->vmep_prot;
> > +
> > + /* No W^X permissions */
> > + if ((prot & PROT_MASK) != prot &&
> > + (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) {
> > + DPRINTF("%s: No W^X permissions\n", __func__);
> > + return (EINVAL);
> > + }
>
> I would probably reword this to "W+X permission requested".
>
> > +
> > + /* No Write only permissions */
> > + if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
> > + DPRINTF("%s: No Write only permissions\n", __func__);
> > + return (EINVAL);
> > + }
> > +
> > + /* No empty permissions */
> > + if (prot == 0) {
> > + DPRINTF("%s: No empty permissions\n", __func__);
> > + return (EINVAL);
> > + }
> > +
> > + /* No execute only on EPT CPUs that don't have that capability */
> > + if (vmm_softc->mode == VMM_MODE_EPT) {
> > + msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
> > + if (prot == PROT_EXEC &&
> > + (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS)) {
> > + printf("%s: Execute only permissions unsupported,"
> > + " adding read permission\n", __func__);
> > + /* XXX should this return (EINVAL) */
> > +
> > + prot |= PROT_READ;
> > + }
> > + }
> > +
> > + /* Must be page aligned */
> > + if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0)
> > + return (EINVAL);
> > +
> > + /* size must be less then 512GB */
> > + if (size >= NBPD_L4)
> > + return (EINVAL);
> > +
> > + /* no wraparound */
> > + if (sgpa + size < sgpa)
> > + return (EINVAL);
> > +
> > + /*
> > + * Specifying addresses within the PCI MMIO space is forbidden.
> > + * Disallow addresses that start inside the MMIO space:
> > + * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> > + */
> > + if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END)
> > + return (EINVAL);
> > +
> > + /*
> > + * ... and disallow addresses that end inside the MMIO space:
> > + * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> > + */
> > + if (sgpa + size > VMM_PCI_MMIO_BAR_BASE &&
> > + sgpa + size <= VMM_PCI_MMIO_BAR_END)
> > + return (EINVAL);
> > +
> > + memtype = vmm_get_guest_memtype(vm, sgpa);
> > + if (memtype == VMM_MEM_TYPE_UNKNOWN)
> > + return (EINVAL);
> > +
> > + if (vmm_softc->mode == VMM_MODE_EPT)
> > + ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
> > + else if (vmm_softc->mode == VMM_MODE_RVI) {
> > + pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
> > + ret = 0;
> > + } else
> > + return (EINVAL);
> > +
> > + return (ret);
> > +}
> > +
> > +/*
> > + * vmx_mprotect_ept
> > + *
> > + * apply the ept protections to the requested pages, faulting the page if
>
> "faulting in"
>
> > + * required.
> > + */
> > +int
> > +vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
> > +{
> > + struct vmx_invept_descriptor vid;
> > + pmap_t pmap;
> > + pt_entry_t *pte;
> > + paddr_t addr;
> > + int ret = 0;
> > +
> > + pmap = vm_map->pmap;
> > +
> > + for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) {
> > + pte = vmx_pmap_find_pte_ept(pmap, addr);
> > + if (pte == NULL) {
>
> if (pte & PG_V) == 0
>
After reading a reply from Adam, I think the original suggested way is fine.
My idea of checking PG_V is fine for RWX EPT entries, but as soon as anyone
uses XO entries, this check wouldn't work.
-ml
> > + ret = uvm_fault(vm_map, addr, VM_FAULT_INVALID,
> > + PROT_READ | PROT_WRITE | PROT_EXEC);
> > + if (ret)
> > + printf("%s: uvm_fault returns %d, GPA=0x%llx\n",
> > + __func__, ret, (uint64_t)addr);
> > +
> > + pte = vmx_pmap_find_pte_ept(pmap, addr);
> > + if (pte == NULL)
> > + return EFAULT;
> > + }
> > +
> > + if (prot & PROT_READ)
> > + *pte |= EPT_R;
> > + else
> > + *pte &= ~EPT_R;
> > +
> > + if (prot & PROT_WRITE)
> > + *pte |= EPT_W;
> > + else
> > + *pte &= ~EPT_W;
> > +
> > + if (prot & PROT_EXEC)
> > + *pte |= EPT_X;
> > + else
> > + *pte &= ~EPT_X;
> > + }
> > +
> > + /*
> > + * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
> > + * the first bullet point seems to say we should call invept.
> > + *
> > + * Software should use the INVEPT instruction with the “single-context”
> > + * INVEPT type after making any of the following changes to an EPT
> > + * paging-structure entry (the INVEPT descriptor should contain an
> > + * EPTP value that references — directly or indirectly
> > + * — the modified EPT paging structure):
> > + * — Changing any of the privilege bits 2:0 from 1 to 0.
> > + * */
> > + if (pmap->eptp != 0) {
> > + memset(&vid, 0, sizeof(vid));
> > + vid.vid_eptp = pmap->eptp;
> > + DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
> > + vid.vid_eptp);
> > + invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid);
> > + }
> > +
> > + return ret;
> > +}
>
> The previous block is not technically correct, but it's as good as it will
> get until we get the larger "proper INVEPT" diff in that I have stashed in
> one of my trees. For now, this is ok. The reason it's not correct as it does
> not consider the case where the ioctl is being called on one CPU while the
> last
> CPU to run the VM might be a different one - in that case, we need to send an
> EPT flush IPI to the other cpu. Note that this problem exists in vmm(4) today
> anyway, even without this diff. Another possible fix for the general problem
> would be to flush the EPT TLB on each vmentry, but that would impact
> performance
> in a negative way.
>
> > +
> > +/*
> > + * vmx_pmap_find_pte_ept
> > + *
> > + * find the page table entry specified by addr in the pmap supplied.
> > + */
> > +pt_entry_t *
> > +vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
> > +{
> > + int l4idx, l3idx, l2idx, l1idx;
> > + pd_entry_t *pd;
> > + paddr_t pdppa;
> > + pt_entry_t *ptes, *pte;
> > +
> > + l4idx = (addr & L4_MASK) >> L4_SHIFT; /* PML4E idx */
> > + l3idx = (addr & L3_MASK) >> L3_SHIFT; /* PDPTE idx */
> > + l2idx = (addr & L2_MASK) >> L2_SHIFT; /* PDE idx */
> > + l1idx = (addr & L1_MASK) >> L1_SHIFT; /* PTE idx */
> > +
> > + pd = (pd_entry_t *)pmap->pm_pdir;
> > + if (pd == NULL)
> > + return NULL;
> > +
> > + /*
> > + * l4idx should always be 0 since we don't support more than 512GB
> > + * guest physical memory.
> > + */
> > + if (l4idx > 0)
> > + return NULL;
> > +
> > + /*
> > + * l3idx should always be < MAXDSIZ/1GB because we don't support more
> > + * than MAXDSIZ guest phys mem.
> > + */
> > + if (l3idx >= MAXDSIZ / ((paddr_t)1024*1024*1024))
>
> Spaces around *s
>
> > + return NULL;
> > +
> > + pdppa = pd[l4idx] & PG_FRAME;
> > + if (pdppa == 0)
> > + return NULL;
> > +
> > + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > + pdppa = ptes[l3idx] & PG_FRAME;
> > + if (pdppa == 0)
> > + return NULL;
> > +
> > + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > + pdppa = ptes[l2idx] & PG_FRAME;
> > + if (pdppa == 0)
> > + return NULL;
> > +
> > + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > + pte = &ptes[l1idx];
> > + if (*pte == 0)
> > + return NULL;
> > +
> > + return pte;
> > +}
> > +
> > +/*
> > * vm_find
> > *
> > * Function to find an existing VM by its identifier.
> > @@ -5099,19 +5388,35 @@ vmx_get_exit_qualification(uint64_t *exi
> > int
> > vmx_get_guest_faulttype(void)
> > {
> > - uint64_t exit_qualification;
> > + uint64_t exit_qual;
> > uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
> > IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
> > - uint64_t protmask = IA32_VMX_EPT_FAULT_READ |
> > - IA32_VMX_EPT_FAULT_WRITE | IA32_VMX_EPT_FAULT_EXEC;
> > + vm_prot_t prot, was_prot;
> >
> > - if (vmx_get_exit_qualification(&exit_qualification))
> > + if (vmx_get_exit_qualification(&exit_qual))
> > return (-1);
> >
> > - if ((exit_qualification & presentmask) == 0)
> > + if ((exit_qual & presentmask) == 0)
> > return VM_FAULT_INVALID;
> > - if (exit_qualification & protmask)
> > +
> > + was_prot = 0;
> > + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
> > + was_prot |= PROT_READ;
> > + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
> > + was_prot |= PROT_WRITE;
> > + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
> > + was_prot |= PROT_EXEC;
> > +
> > + if (exit_qual & IA32_VMX_EPT_FAULT_READ)
> > + prot = PROT_READ;
> > + else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
> > + prot = PROT_WRITE;
> > + else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
> > + prot = PROT_EXEC;
> > +
> > + if ((was_prot & prot) == 0)
> > return VM_FAULT_PROTECT;
> > +
> > return (-1);
> > }
> >
> > @@ -5188,6 +5493,9 @@ svm_handle_np_fault(struct vcpu *vcpu)
> > *
> > * Request a new page to be faulted into the UVM map of the VM owning
> > 'vcpu'
> > * at address 'gpa'.
> > + *
> > + * Returns EAGAIN to indication a protection fault, ie writing to a read
> > only
> > + * page.
> > */
> > int
> > vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
> > @@ -5198,6 +5506,11 @@ vmx_fault_page(struct vcpu *vcpu, paddr_
> > if (fault_type == -1) {
> > printf("%s: invalid fault type\n", __func__);
> > return (EINVAL);
> > + }
> > +
> > + if (fault_type == VM_FAULT_PROTECT) {
> > + vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
> > + return (EAGAIN);
> > }
> >
> > ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type,
> > Index: sys/arch/amd64/include/specialreg.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 specialreg.h
> > --- sys/arch/amd64/include/specialreg.h 20 Dec 2019 07:49:31 -0000
> > 1.86
> > +++ sys/arch/amd64/include/specialreg.h 7 Feb 2020 03:15:17 -0000
> > @@ -954,9 +954,10 @@
> > #define IA32_VMX_TRUE_ENTRY_CTLS 0x490
> > #define IA32_VMX_VMFUNC 0x491
> >
> > -#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6)
> > -#define IA32_EPT_VPID_CAP_WB (1ULL << 14)
> > -#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21)
> > +#define IA32_EPT_VPID_CAP_XO_TRANSLATIONS 0x0
> > +#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6)
> > +#define IA32_EPT_VPID_CAP_WB (1ULL << 14)
> > +#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21)
> >
> > #define IA32_EPT_PAGING_CACHE_TYPE_UC 0x0
> > #define IA32_EPT_PAGING_CACHE_TYPE_WB 0x6
> > Index: sys/arch/amd64/include/vmmvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> > retrieving revision 1.68
> > diff -u -p -u -p -r1.68 vmmvar.h
> > --- sys/arch/amd64/include/vmmvar.h 22 Jan 2020 03:29:58 -0000 1.68
> > +++ sys/arch/amd64/include/vmmvar.h 7 Feb 2020 03:15:17 -0000
> > @@ -322,6 +322,10 @@ enum {
> > };
> >
> > enum {
> > + VEE_FAULT_PROTECT
> > +};
> > +
> > +enum {
> > VMM_CPU_MODE_REAL,
> > VMM_CPU_MODE_PROT,
> > VMM_CPU_MODE_PROT32,
> > @@ -350,6 +354,12 @@ struct vm_exit_inout {
> > uint16_t vei_port; /* port */
> > uint32_t vei_data; /* data */
> > };
> > +/*
> > + * vm_exit_eptviolation : describes an EPT VIOLATION exit
> > + */
> > +struct vm_exit_eptviolation {
> > + uint8_t vee_fault_type;
> > +};
> >
> > /*
> > * struct vcpu_segment_info
> > @@ -447,7 +457,8 @@ struct vm_mem_range {
> > */
> > struct vm_exit {
> > union {
> > - struct vm_exit_inout vei; /* IN/OUT exit */
> > + struct vm_exit_inout vei; /* IN/OUT exit */
> > + struct vm_exit_eptviolation vee; /* EPT VIOLATION exit*/
> > };
> >
> > struct vcpu_reg_state vrs;
> > @@ -558,6 +569,15 @@ struct vm_rwregs_params {
> > struct vcpu_reg_state vrwp_regs;
> > };
> >
> > +struct vm_mprotect_ept_params {
> > + /* Input parameters to VMM_IOC_MPROTECT_EPT */
> > + uint32_t vmep_vm_id;
> > + uint32_t vmep_vcpu_id;
> > + vaddr_t vmep_sgpa;
> > + size_t vmep_size;
> > + int vmep_prot;
> > +};
> > +
> > /* IOCTL definitions */
> > #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM
> > */
> > #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
> > @@ -571,7 +591,8 @@ struct vm_rwregs_params {
> > #define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params)
> > /* Set VM params */
> > #define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params)
> > -
> > +/* Control the protection of ept pages*/
> > +#define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
> >
> > /* CPUID masks */
> > /*
> > Index: usr.sbin/vmd/vm.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> > retrieving revision 1.54
> > diff -u -p -u -p -r1.54 vm.c
> > --- usr.sbin/vmd/vm.c 11 Dec 2019 06:45:16 -0000 1.54
> > +++ usr.sbin/vmd/vm.c 7 Feb 2020 03:15:23 -0000
> > @@ -81,6 +81,7 @@ void init_emulated_hw(struct vmop_create
> > void restore_emulated_hw(struct vm_create_params *, int, int *,
> > int[][VM_MAX_BASE_PER_DISK],int);
> > void vcpu_exit_inout(struct vm_run_params *);
> > +int vcpu_exit_eptviolation(struct vm_run_params *);
> > uint8_t vcpu_exit_pci(struct vm_run_params *);
> > int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
> > int loadfile_bios(FILE *, struct vcpu_reg_state *);
> > @@ -1600,6 +1601,38 @@ vcpu_exit_inout(struct vm_run_params *vr
> > }
> >
> > /*
> > + * vcpu_exit_eptviolation
> > + *
> > + * handle an EPT Violation
> > + *
> > + *
> > + * Parameters:
> > + * vrp: vcpu run parameters containing guest state for this exit
> > + *
> > + * Return values:
> > + * 0: no action required
> > + * EAGAIN: a protection fault occured, kill the vm.
> > + */
> > +int
> > +vcpu_exit_eptviolation(struct vm_run_params *vrp)
> > +{
> > + struct vm_exit *ve = vrp->vrp_exit;
> > + /*
> > + * vmd may be exiting to vmd to handle a pending interrupt
> > + * but last exit type may have bee VMX_EXIT_EPT_VIOLATION,
> > + * check the fault_type to ensure we really are processing
> > + * a VMX_EXIT_EPT_VIOLATION.
> > + */
> > + if (ve->vee.vee_fault_type == VEE_FAULT_PROTECT) {
> > + log_debug("%s: EPT Violation: rip=0x%llx",
> > + __progname, vrp->vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP]);
> > + return (EAGAIN);
> > + }
> > +
> > + return (0);
> > +}
> > +
> > +/*
> > * vcpu_exit
> > *
> > * Handle a vcpu exit. This function is called when it is determined that
> > @@ -1629,7 +1662,6 @@ vcpu_exit(struct vm_run_params *vrp)
> > case VMX_EXIT_CPUID:
> > case VMX_EXIT_EXTINT:
> > case SVM_VMEXIT_INTR:
> > - case VMX_EXIT_EPT_VIOLATION:
> > case SVM_VMEXIT_NPF:
> > case SVM_VMEXIT_MSR:
> > case SVM_VMEXIT_CPUID:
> > @@ -1640,6 +1672,12 @@ vcpu_exit(struct vm_run_params *vrp)
> > * here (and falling through to the default case below results
> > * in more vmd log spam).
> > */
> > + break;
> > + case VMX_EXIT_EPT_VIOLATION:
> > + ret = vcpu_exit_eptviolation(vrp);
> > + if (ret)
> > + return (ret);
> > +
> > break;
> > case VMX_EXIT_IO:
> > case SVM_VMEXIT_IOIO:
> >