On Fri, Feb 07, 2020 at 01:25:38PM -0800, Mike Larkin wrote:
> On Fri, Feb 07, 2020 at 04:20:16AM +0000, Adam Steen wrote:
> > Hi
> > 
> > Please see the attached patch to add an 'IOCTL handler to sets the access
> > protections of the ept'
> > 
> > vmd(8) does not make use of this change, but solo5, which uses vmm(4) as
> > a backend hypervisor. The code calling 'VMM_IOC_MPROTECT_EPT' is
> > available here 
> > https://github.com/Solo5/solo5/compare/master...adamsteen:wnox
> > 
> > there are changes to vmd too, but this is just to ensure completeness,
> > if mprotect ept is called in the future, we would want the vm to be
> > stopped if we get a protection fault.
> > 
> > I was unsure what todo if called with execute only permissions on a cpu that
> > does not support it. I went with add read permissions and logging the
> > fact, instead of returning EINVAL.
> > 
> > Cheers
> > Adam
> > 
> 
> I have been giving Adam feedback on this diff for a while. There are a few
> minor comments below, but I think this is ok if someone wants to commit it 
> after
> the fixes below are incorporated.
> 
> -ml
> 

See updated comment below.

-ml

> > ? div
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.258
> > diff -u -p -u -p -r1.258 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c      31 Jan 2020 01:51:27 -0000      1.258
> > +++ sys/arch/amd64/amd64/vmm.c      7 Feb 2020 03:15:16 -0000
> > @@ -124,6 +124,7 @@ int vm_get_info(struct vm_info_params *)
> >  int vm_resetcpu(struct vm_resetcpu_params *);
> >  int vm_intr_pending(struct vm_intr_params *);
> >  int vm_rwregs(struct vm_rwregs_params *, int);
> > +int vm_mprotect_ept(struct vm_mprotect_ept_params *);
> >  int vm_rwvmparams(struct vm_rwvmparams_params *, int);
> >  int vm_find(uint32_t, struct vm **);
> >  int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> > @@ -186,6 +187,8 @@ int svm_fault_page(struct vcpu *, paddr_
> >  int vmx_fault_page(struct vcpu *, paddr_t);
> >  int vmx_handle_np_fault(struct vcpu *);
> >  int svm_handle_np_fault(struct vcpu *);
> > +int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
> > +pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t);
> >  int vmm_alloc_vpid(uint16_t *);
> >  void vmm_free_vpid(uint16_t);
> >  const char *vcpu_state_decode(u_int);
> > @@ -493,6 +496,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t 
> >     case VMM_IOC_WRITEREGS:
> >             ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
> >             break;
> > +   case VMM_IOC_MPROTECT_EPT:
> > +           ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
> > +           break;
> >     case VMM_IOC_READVMPARAMS:
> >             ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
> >             break;
> > @@ -531,6 +537,7 @@ pledge_ioctl_vmm(struct proc *p, long co
> >     case VMM_IOC_INTR:
> >     case VMM_IOC_READREGS:
> >     case VMM_IOC_WRITEREGS:
> > +   case VMM_IOC_MPROTECT_EPT:
> >     case VMM_IOC_READVMPARAMS:
> >     case VMM_IOC_WRITEVMPARAMS:
> >             return (0);
> > @@ -806,6 +813,288 @@ vm_rwregs(struct vm_rwregs_params *vrwp,
> >  }
> >  
> >  /*
> > + * vm_mprotect_ept
> > + *
> > + * IOCTL handler to sets the access protections of the ept
> > + *
> > + * Parameters:
> > + *   vmep: decribes the memory for which the protect will be applied..
> > + *
> > + * Return values:
> > + *  0: if successful
> > + *  ENOENT: if the VM defined by 'vmep' cannot be found
> > + *  EINVAL: if the sgpa or size is not page aligned, the prot is invalid,
> > + *          size is too large (512GB), there is wraparound
> > + *          (like start = 512GB-1 and end = 512GB-2),
> > + *          the address specified is not within the vm's mem range
> > + *          or the address lies inside reserved (MMIO) memory
> > + */
> > +int
> > +vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
> > +{
> > +   struct vm *vm;
> > +   struct vcpu *vcpu;
> > +   vaddr_t sgpa;
> > +   size_t size;
> > +   vm_prot_t prot;
> > +   uint64_t msr;
> > +   int ret, memtype;
> > +
> > +   /* If not EPT or RVI, nothing to do here */
> > +   if (!(vmm_softc->mode == VMM_MODE_EPT
> > +       || vmm_softc->mode == VMM_MODE_RVI))
> > +           return (0);
> > +
> > +   /* Find the desired VM */
> > +   rw_enter_read(&vmm_softc->vm_lock);
> > +   ret = vm_find(vmep->vmep_vm_id, &vm);
> > +   rw_exit_read(&vmm_softc->vm_lock);
> > +
> > +   /* Not found? exit. */
> > +   if (ret != 0) {
> > +           DPRINTF("%s: vm id %u not found\n", __func__,
> > +               vmep->vmep_vm_id);
> > +           return (ret);
> > +   }
> > +
> > +   rw_enter_read(&vm->vm_vcpu_lock);
> > +   SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
> > +           if (vcpu->vc_id == vmep->vmep_vcpu_id)
> > +                   break;
> > +   }
> > +   rw_exit_read(&vm->vm_vcpu_lock);
> > +
> > +   if (vcpu == NULL) {
> > +           DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
> > +               vmep->vmep_vcpu_id, vmep->vmep_vm_id);
> > +           return (ENOENT);
> > +   }
> > +
> > +   if (vcpu->vc_state != VCPU_STATE_STOPPED) {
> > +           DPRINTF("%s: mprotect_ept %u on vm %u attempted "
> > +               "while vcpu was in state %u (%s)\n", __func__,
> > +               vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
> > +               vcpu_state_decode(vcpu->vc_state));
> > +
> > +           return (EBUSY);
> > +   }
> > +
> > +   /* Only proceed if the pmap is in the correct mode */
> > +   KASSERT((vmm_softc->mode == VMM_MODE_EPT &&
> > +       vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||
> > +       (vmm_softc->mode == VMM_MODE_RVI &&
> > +        vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI));
> > +
> > +   sgpa = vmep->vmep_sgpa;
> > +   size = vmep->vmep_size;
> > +   prot = vmep->vmep_prot;
> > +
> > +   /* No W^X permissions */
> > +   if ((prot & PROT_MASK) != prot &&
> > +       (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) {
> > +           DPRINTF("%s: No W^X permissions\n", __func__);
> > +           return (EINVAL);
> > +   }
> 
> I would probably reword this to "W+X permission requested".
> 
> > +
> > +   /* No Write only permissions */
> > +   if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
> > +           DPRINTF("%s: No Write only permissions\n", __func__);
> > +           return (EINVAL);
> > +   }
> > +
> > +   /* No empty permissions */
> > +   if (prot == 0) {
> > +           DPRINTF("%s: No empty permissions\n", __func__);
> > +           return (EINVAL);
> > +   }
> > +
> > +   /* No execute only on EPT CPUs that don't have that capability */
> > +   if (vmm_softc->mode == VMM_MODE_EPT) {
> > +           msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
> > +           if (prot == PROT_EXEC && 
> > +               (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS)) {
> > +                   printf("%s: Execute only permissions unsupported,"
> > +                      " adding read permission\n", __func__);
> > +                   /* XXX should this return (EINVAL) */
> > +
> > +                   prot |= PROT_READ;
> > +           }
> > +   }
> > +
> > +   /* Must be page aligned */
> > +   if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0)
> > +           return (EINVAL);
> > +
> > +   /* size must be less then 512GB */
> > +   if (size >= NBPD_L4)
> > +           return (EINVAL);
> > +   
> > +   /* no wraparound */
> > +   if (sgpa + size < sgpa)
> > +           return (EINVAL);
> > +
> > +   /*
> > +    * Specifying addresses within the PCI MMIO space is forbidden.
> > +    * Disallow addresses that start inside the MMIO space:
> > +    * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> > +    */
> > +   if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END)
> > +           return (EINVAL);
> > +
> > +   /*
> > +    * ... and disallow addresses that end inside the MMIO space:
> > +    * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
> > +    */
> > +   if (sgpa + size > VMM_PCI_MMIO_BAR_BASE &&
> > +       sgpa + size <= VMM_PCI_MMIO_BAR_END)
> > +           return (EINVAL);
> > +
> > +   memtype = vmm_get_guest_memtype(vm, sgpa);
> > +   if (memtype == VMM_MEM_TYPE_UNKNOWN)
> > +           return (EINVAL);
> > +
> > +   if (vmm_softc->mode == VMM_MODE_EPT)
> > +           ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
> > +   else if (vmm_softc->mode == VMM_MODE_RVI) {
> > +           pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
> > +           ret = 0;
> > +   } else
> > +           return (EINVAL);
> > +
> > +   return (ret);
> > +}
> > +
> > +/*
> > + * vmx_mprotect_ept
> > + *
> > + * apply the ept protections to the requested pages, faulting the page if
> 
> "faulting in"
> 
> > + * required.
> > + */
> > +int
> > +vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
> > +{
> > +   struct vmx_invept_descriptor vid;
> > +   pmap_t pmap;
> > +   pt_entry_t *pte;
> > +   paddr_t addr;
> > +   int ret = 0;
> > +
> > +   pmap = vm_map->pmap;
> > +
> > +   for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) {
> > +           pte = vmx_pmap_find_pte_ept(pmap, addr);
> > +           if (pte == NULL) {
> 
> if (pte & PG_V) == 0
> 

After reading a reply from Adam, I think the original suggested way is fine.
My idea of checking PG_V is fine for RWX EPT entries, but as soon as anyone
uses XO entries, this check wouldn't work.

-ml

> > +                   ret = uvm_fault(vm_map, addr, VM_FAULT_INVALID,
> > +                       PROT_READ | PROT_WRITE | PROT_EXEC);
> > +                   if (ret)
> > +                           printf("%s: uvm_fault returns %d, GPA=0x%llx\n",
> > +                               __func__, ret, (uint64_t)addr);
> > +
> > +                   pte = vmx_pmap_find_pte_ept(pmap, addr);
> > +                   if (pte == NULL)
> > +                           return EFAULT;
> > +           }
> > +
> > +           if (prot & PROT_READ)
> > +                   *pte |= EPT_R;
> > +           else
> > +                   *pte &= ~EPT_R;
> > +
> > +           if (prot & PROT_WRITE)
> > +                   *pte |= EPT_W;
> > +           else
> > +                   *pte &= ~EPT_W;
> > +
> > +           if (prot & PROT_EXEC)
> > +                   *pte |= EPT_X;
> > +           else
> > +                   *pte &= ~EPT_X;
> > +   }
> > +
> > +   /*
> > +    * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
> > +    * the first bullet point seems to say we should call invept.
> > +    *
> > +    * Software should use the INVEPT instruction with the “single-context”
> > +    * INVEPT type after making any of the following changes to an EPT
> > +    * paging-structure entry (the INVEPT descriptor should contain an
> > +    * EPTP value that references — directly or indirectly
> > +    * — the modified EPT paging structure):
> > +    * —   Changing any of the privilege bits 2:0 from 1 to 0.
> > +    * */
> > +   if (pmap->eptp != 0) {
> > +           memset(&vid, 0, sizeof(vid));
> > +           vid.vid_eptp = pmap->eptp;
> > +           DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
> > +               vid.vid_eptp);
> > +           invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid);
> > +   }
> > +
> > +   return ret;
> > +}
> 
> The previous block is not technically correct, but it's as good as it will
> get until we get the larger "proper INVEPT" diff in that I have stashed in
> one of my trees. For now, this is ok. The reason it's not correct as it does
> not consider the case where the ioctl is being called on one CPU while the 
> last
> CPU to run the VM might be a different one - in that case, we need to send an
> EPT flush IPI to the other cpu. Note that this problem exists in vmm(4) today
> anyway, even without this diff. Another possible fix for the general problem
> would be to flush the EPT TLB on each vmentry, but that would impact 
> performance
> in a negative way.
> 
> > +
> > +/*
> > + * vmx_pmap_find_pte_ept
> > + *
> > + * find the page table entry specified by addr in the pmap supplied.
> > + */
> > +pt_entry_t *
> > +vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
> > +{
> > +   int l4idx, l3idx, l2idx, l1idx;
> > +   pd_entry_t *pd;
> > +   paddr_t pdppa;
> > +   pt_entry_t *ptes, *pte;
> > +
> > +   l4idx = (addr & L4_MASK) >> L4_SHIFT; /* PML4E idx */
> > +   l3idx = (addr & L3_MASK) >> L3_SHIFT; /* PDPTE idx */
> > +   l2idx = (addr & L2_MASK) >> L2_SHIFT; /* PDE idx */
> > +   l1idx = (addr & L1_MASK) >> L1_SHIFT; /* PTE idx */
> > +
> > +   pd = (pd_entry_t *)pmap->pm_pdir;
> > +   if (pd == NULL)
> > +           return NULL;
> > +
> > +   /*
> > +    * l4idx should always be 0 since we don't support more than 512GB
> > +    * guest physical memory.
> > +    */
> > +   if (l4idx > 0)
> > +           return NULL;
> > +
> > +   /*
> > +    * l3idx should always be < MAXDSIZ/1GB because we don't support more
> > +    * than MAXDSIZ guest phys mem.
> > +    */
> > +   if (l3idx >= MAXDSIZ / ((paddr_t)1024*1024*1024))
> 
> Spaces around *s
> 
> > +           return NULL;
> > +
> > +   pdppa = pd[l4idx] & PG_FRAME;
> > +   if (pdppa == 0)
> > +           return NULL;
> > +
> > +   ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > +   pdppa = ptes[l3idx] & PG_FRAME;
> > +   if (pdppa == 0)
> > +           return NULL;
> > +
> > +   ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > +   pdppa = ptes[l2idx] & PG_FRAME;
> > +   if (pdppa == 0)
> > +           return NULL;
> > +
> > +   ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
> > +
> > +   pte = &ptes[l1idx];
> > +   if (*pte == 0)
> > +           return NULL;
> > +
> > +   return pte;
> > +}
> > +
> > +/*
> >   * vm_find
> >   *
> >   * Function to find an existing VM by its identifier.
> > @@ -5099,19 +5388,35 @@ vmx_get_exit_qualification(uint64_t *exi
> >  int
> >  vmx_get_guest_faulttype(void)
> >  {
> > -   uint64_t exit_qualification;
> > +   uint64_t exit_qual;
> >     uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
> >         IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
> > -   uint64_t protmask = IA32_VMX_EPT_FAULT_READ |
> > -       IA32_VMX_EPT_FAULT_WRITE | IA32_VMX_EPT_FAULT_EXEC;
> > +   vm_prot_t prot, was_prot;
> >  
> > -   if (vmx_get_exit_qualification(&exit_qualification))
> > +   if (vmx_get_exit_qualification(&exit_qual))
> >             return (-1);
> >  
> > -   if ((exit_qualification & presentmask) == 0)
> > +   if ((exit_qual & presentmask) == 0)
> >             return VM_FAULT_INVALID;
> > -   if (exit_qualification & protmask)
> > +
> > +   was_prot = 0;
> > +   if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
> > +           was_prot |= PROT_READ;
> > +   if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
> > +           was_prot |= PROT_WRITE;
> > +   if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
> > +           was_prot |= PROT_EXEC;
> > +
> > +   if (exit_qual & IA32_VMX_EPT_FAULT_READ)
> > +           prot = PROT_READ;
> > +   else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
> > +           prot = PROT_WRITE;
> > +   else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
> > +           prot = PROT_EXEC;
> > +
> > +   if ((was_prot & prot) == 0)
> >             return VM_FAULT_PROTECT;
> > +
> >     return (-1);
> >  }
> >  
> > @@ -5188,6 +5493,9 @@ svm_handle_np_fault(struct vcpu *vcpu)
> >   *
> >   * Request a new page to be faulted into the UVM map of the VM owning 
> > 'vcpu'
> >   * at address 'gpa'.
> > + *
> > + * Returns EAGAIN to indication a protection fault, ie writing to a read 
> > only
> > + * page.
> >   */
> >  int
> >  vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
> > @@ -5198,6 +5506,11 @@ vmx_fault_page(struct vcpu *vcpu, paddr_
> >     if (fault_type == -1) {
> >             printf("%s: invalid fault type\n", __func__);
> >             return (EINVAL);
> > +   }
> > +
> > +   if (fault_type == VM_FAULT_PROTECT) {
> > +           vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
> > +           return (EAGAIN);
> >     }
> >  
> >     ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type,
> > Index: sys/arch/amd64/include/specialreg.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 specialreg.h
> > --- sys/arch/amd64/include/specialreg.h     20 Dec 2019 07:49:31 -0000      
> > 1.86
> > +++ sys/arch/amd64/include/specialreg.h     7 Feb 2020 03:15:17 -0000
> > @@ -954,9 +954,10 @@
> >  #define IA32_VMX_TRUE_ENTRY_CTLS   0x490
> >  #define IA32_VMX_VMFUNC                    0x491
> >  
> > -#define IA32_EPT_VPID_CAP_PAGE_WALK_4      (1ULL << 6)
> > -#define IA32_EPT_VPID_CAP_WB               (1ULL << 14)
> > -#define IA32_EPT_VPID_CAP_AD_BITS  (1ULL << 21)
> > +#define IA32_EPT_VPID_CAP_XO_TRANSLATIONS  0x0
> > +#define IA32_EPT_VPID_CAP_PAGE_WALK_4              (1ULL << 6)
> > +#define IA32_EPT_VPID_CAP_WB                       (1ULL << 14)
> > +#define IA32_EPT_VPID_CAP_AD_BITS          (1ULL << 21)
> >  
> >  #define IA32_EPT_PAGING_CACHE_TYPE_UC      0x0
> >  #define IA32_EPT_PAGING_CACHE_TYPE_WB      0x6
> > Index: sys/arch/amd64/include/vmmvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> > retrieving revision 1.68
> > diff -u -p -u -p -r1.68 vmmvar.h
> > --- sys/arch/amd64/include/vmmvar.h 22 Jan 2020 03:29:58 -0000      1.68
> > +++ sys/arch/amd64/include/vmmvar.h 7 Feb 2020 03:15:17 -0000
> > @@ -322,6 +322,10 @@ enum {
> >  };
> >  
> >  enum {
> > +   VEE_FAULT_PROTECT
> > +};
> > +
> > +enum {
> >     VMM_CPU_MODE_REAL,
> >     VMM_CPU_MODE_PROT,
> >     VMM_CPU_MODE_PROT32,
> > @@ -350,6 +354,12 @@ struct vm_exit_inout {
> >     uint16_t                vei_port;       /* port */
> >     uint32_t                vei_data;       /* data */
> >  };
> > +/*
> > + *  vm_exit_eptviolation   : describes an EPT VIOLATION exit
> > + */
> > +struct vm_exit_eptviolation {
> > +   uint8_t         vee_fault_type;
> > +};
> >  
> >  /*
> >   * struct vcpu_segment_info
> > @@ -447,7 +457,8 @@ struct vm_mem_range {
> >   */
> >  struct vm_exit {
> >     union {
> > -           struct vm_exit_inout    vei;            /* IN/OUT exit */
> > +           struct vm_exit_inout            vei;    /* IN/OUT exit */
> > +           struct vm_exit_eptviolation     vee;    /* EPT VIOLATION exit*/
> >     };
> >  
> >     struct vcpu_reg_state           vrs;
> > @@ -558,6 +569,15 @@ struct vm_rwregs_params {
> >     struct vcpu_reg_state   vrwp_regs;
> >  };
> >  
> > +struct vm_mprotect_ept_params {
> > +   /* Input parameters to VMM_IOC_MPROTECT_EPT */
> > +   uint32_t                vmep_vm_id;
> > +   uint32_t                vmep_vcpu_id;
> > +   vaddr_t                 vmep_sgpa;
> > +   size_t                  vmep_size;
> > +   int                     vmep_prot;
> > +};
> > +
> >  /* IOCTL definitions */
> >  #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM 
> > */
> >  #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
> > @@ -571,7 +591,8 @@ struct vm_rwregs_params {
> >  #define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params)
> >  /* Set VM params */
> >  #define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params)
> > -
> > +/* Control the protection of ept pages*/
> > +#define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
> >  
> >  /* CPUID masks */
> >  /*
> > Index: usr.sbin/vmd/vm.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
> > retrieving revision 1.54
> > diff -u -p -u -p -r1.54 vm.c
> > --- usr.sbin/vmd/vm.c       11 Dec 2019 06:45:16 -0000      1.54
> > +++ usr.sbin/vmd/vm.c       7 Feb 2020 03:15:23 -0000
> > @@ -81,6 +81,7 @@ void init_emulated_hw(struct vmop_create
> >  void restore_emulated_hw(struct vm_create_params *, int, int *,
> >      int[][VM_MAX_BASE_PER_DISK],int);
> >  void vcpu_exit_inout(struct vm_run_params *);
> > +int vcpu_exit_eptviolation(struct vm_run_params *);
> >  uint8_t vcpu_exit_pci(struct vm_run_params *);
> >  int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
> >  int loadfile_bios(FILE *, struct vcpu_reg_state *);
> > @@ -1600,6 +1601,38 @@ vcpu_exit_inout(struct vm_run_params *vr
> >  }
> >  
> >  /*
> > + * vcpu_exit_eptviolation
> > + *
> > + * handle an EPT Violation
> > + *
> > + *
> > + * Parameters:
> > + *  vrp: vcpu run parameters containing guest state for this exit
> > + *
> > + * Return values:
> > + *  0: no action required
> > + *  EAGAIN: a protection fault occured, kill the vm.
> > + */
> > +int
> > +vcpu_exit_eptviolation(struct vm_run_params *vrp)
> > +{
> > +   struct vm_exit *ve = vrp->vrp_exit;
> > +   /*
> > +    * vmd may be exiting to vmd to handle a pending interrupt
> > +    * but last exit type may have bee VMX_EXIT_EPT_VIOLATION,
> > +    * check the fault_type to ensure we really are processing
> > +    * a VMX_EXIT_EPT_VIOLATION.
> > +    */
> > +   if (ve->vee.vee_fault_type == VEE_FAULT_PROTECT) {
> > +           log_debug("%s: EPT Violation: rip=0x%llx",
> > +               __progname, vrp->vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP]);
> > +           return (EAGAIN);
> > +   }
> > +
> > +   return (0);
> > +}
> > +
> > +/*
> >   * vcpu_exit
> >   *
> >   * Handle a vcpu exit. This function is called when it is determined that
> > @@ -1629,7 +1662,6 @@ vcpu_exit(struct vm_run_params *vrp)
> >     case VMX_EXIT_CPUID:
> >     case VMX_EXIT_EXTINT:
> >     case SVM_VMEXIT_INTR:
> > -   case VMX_EXIT_EPT_VIOLATION:
> >     case SVM_VMEXIT_NPF:
> >     case SVM_VMEXIT_MSR:
> >     case SVM_VMEXIT_CPUID:
> > @@ -1640,6 +1672,12 @@ vcpu_exit(struct vm_run_params *vrp)
> >              * here (and falling through to the default case below results
> >              * in more vmd log spam).
> >              */
> > +           break;
> > +   case VMX_EXIT_EPT_VIOLATION:
> > +           ret = vcpu_exit_eptviolation(vrp);
> > +           if (ret)
> > +                   return (ret);
> > +
> >             break;
> >     case VMX_EXIT_IO:
> >     case SVM_VMEXIT_IOIO:
> > 

Reply via email to