Re: [PATCH v3 2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

2021-01-26 Thread Maxim Levitsky
On Tue, 2021-01-26 at 03:18 -0500, Wei Huang wrote:
> From: Bandan Das 
> 
> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
> CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
> before checking VMCB's instruction intercept. If EAX falls into such
> memory areas, #GP is triggered before VMEXIT. This causes problem under
> nested virtualization. To solve this problem, KVM needs to trap #GP and
> check the instructions triggering #GP. For VM execution instructions,
> KVM emulates these instructions.
> 
> Co-developed-by: Wei Huang 
> Signed-off-by: Wei Huang 
> Signed-off-by: Bandan Das 
> ---
>  arch/x86/kvm/svm/svm.c | 109 ++---
>  1 file changed, 91 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 7ef171790d02..e5ca01e25e89 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -200,6 +200,8 @@ module_param(sev_es, int, 0444);
>  bool __read_mostly dump_invalid_vmcb;
>  module_param(dump_invalid_vmcb, bool, 0644);
>  
> +bool svm_gp_erratum_intercept = true;
I'll expect this to be a module parm, so that user
could override it, just like enable_vmware_backdoor

> +
>  static u8 rsm_ins_bytes[] = "\x0f\xaa";
>  
>  static void svm_complete_interrupts(struct vcpu_svm *svm);
> @@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>   if (!(efer & EFER_SVME)) {
>   svm_leave_nested(svm);
>   svm_set_gif(svm, true);
> + /* #GP intercept is still needed in vmware_backdoor */
> + if (!enable_vmware_backdoor)
I would use if (svm_gp_erratum_intercept && !enable_vmware_backdoor) to document
this.

> + clr_exception_intercept(svm, GP_VECTOR);
>  
>   /*
>* Free the nested guest state, unless we are in SMM.
> @@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>  
>   svm->vmcb->save.efer = efer | EFER_SVME;
>   vmcb_mark_dirty(svm->vmcb, VMCB_CR);
> + /* Enable #GP interception for SVM instructions */
> + if (svm_gp_erratum_intercept)
> + set_exception_intercept(svm, GP_VECTOR);
> +
>   return 0;
>  }
>  
> @@ -1957,24 +1966,6 @@ static int ac_interception(struct vcpu_svm *svm)
>   return 1;
>  }
>  
> -static int gp_interception(struct vcpu_svm *svm)
> -{
> - struct kvm_vcpu *vcpu = >vcpu;
> - u32 error_code = svm->vmcb->control.exit_info_1;
> -
> - WARN_ON_ONCE(!enable_vmware_backdoor);  
> -
> - /*
> -  * VMware backdoor emulation on #GP interception only handles IN{S},
> -  * OUT{S}, and RDPMC, none of which generate a non-zero error code.
> -  */
> - if (error_code) {
> - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> - return 1;
> - }
> - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
> -}
> -
>  static bool is_erratum_383(void)
>  {
>   int err, i;
> @@ -2173,6 +2164,88 @@ static int vmrun_interception(struct vcpu_svm *svm)
>   return nested_svm_vmrun(svm);
>  }
>  
> +enum {
> + NONE_SVM_INSTR,
> + SVM_INSTR_VMRUN,
> + SVM_INSTR_VMLOAD,
> + SVM_INSTR_VMSAVE,
> +};
> +
> +/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
> +static int svm_instr_opcode(struct kvm_vcpu *vcpu)
> +{
> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> +
> + if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
> + return NONE_SVM_INSTR;
> +
> + switch (ctxt->modrm) {
> + case 0xd8: /* VMRUN */
> + return SVM_INSTR_VMRUN;
> + case 0xda: /* VMLOAD */
> + return SVM_INSTR_VMLOAD;
> + case 0xdb: /* VMSAVE */
> + return SVM_INSTR_VMSAVE;
> + default:
> + break;
> + }
> +
> + return NONE_SVM_INSTR;
> +}
> +
> +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> +{
> + int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
> + [SVM_INSTR_VMRUN] = vmrun_interception,
> + [SVM_INSTR_VMLOAD] = vmload_interception,
> + [SVM_INSTR_VMSAVE] = vmsave_interception,
> + };
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> + return svm_instr_handlers[opcode](svm);
> +}
> +
> +/*
> + * #GP handling code. Note that #GP can be triggered under the following two
> + * cases:
> + *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
> + *  some AMD CPUs when EAX of these instructions are in the reserved 
> memory
> + *  regions (e.g. SMM memory on host).
> + *   2) VMware backdoor
> + */
> +static int gp_interception(struct vcpu_svm *svm)
> +{
> + struct kvm_vcpu *vcpu = >vcpu;
> + u32 error_code = svm->vmcb->control.exit_info_1;
> + int opcode;
> +
> + /* Both #GP cases have zero error_code */
> + if (error_code)
> + 

Re: [PATCH v3 2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

2021-01-26 Thread Paolo Bonzini

On 26/01/21 09:18, Wei Huang wrote:


@@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!(efer & EFER_SVME)) {
svm_leave_nested(svm);
svm_set_gif(svm, true);
+   /* #GP intercept is still needed in vmware_backdoor */
+   if (!enable_vmware_backdoor)
+   clr_exception_intercept(svm, GP_VECTOR);
 
 			/*

 * Free the nested guest state, unless we are in SMM.
@@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
 	svm->vmcb->save.efer = efer | EFER_SVME;

vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+   /* Enable #GP interception for SVM instructions */
+   if (svm_gp_erratum_intercept)
+   set_exception_intercept(svm, GP_VECTOR);
+
return 0;
 }
 


This should be in the "if (!(efer & EFER_SVME)) else" branch.  I'll fix 
it up myself.


Paolo



[PATCH v3 2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

2021-01-26 Thread Wei Huang
From: Bandan Das 

While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
before checking VMCB's instruction intercept. If EAX falls into such
memory areas, #GP is triggered before VMEXIT. This causes problem under
nested virtualization. To solve this problem, KVM needs to trap #GP and
check the instructions triggering #GP. For VM execution instructions,
KVM emulates these instructions.

Co-developed-by: Wei Huang 
Signed-off-by: Wei Huang 
Signed-off-by: Bandan Das 
---
 arch/x86/kvm/svm/svm.c | 109 ++---
 1 file changed, 91 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7ef171790d02..e5ca01e25e89 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -200,6 +200,8 @@ module_param(sev_es, int, 0444);
 bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
+bool svm_gp_erratum_intercept = true;
+
 static u8 rsm_ins_bytes[] = "\x0f\xaa";
 
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!(efer & EFER_SVME)) {
svm_leave_nested(svm);
svm_set_gif(svm, true);
+   /* #GP intercept is still needed in vmware_backdoor */
+   if (!enable_vmware_backdoor)
+   clr_exception_intercept(svm, GP_VECTOR);
 
/*
 * Free the nested guest state, unless we are in SMM.
@@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
svm->vmcb->save.efer = efer | EFER_SVME;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+   /* Enable #GP interception for SVM instructions */
+   if (svm_gp_erratum_intercept)
+   set_exception_intercept(svm, GP_VECTOR);
+
return 0;
 }
 
@@ -1957,24 +1966,6 @@ static int ac_interception(struct vcpu_svm *svm)
return 1;
 }
 
-static int gp_interception(struct vcpu_svm *svm)
-{
-   struct kvm_vcpu *vcpu = >vcpu;
-   u32 error_code = svm->vmcb->control.exit_info_1;
-
-   WARN_ON_ONCE(!enable_vmware_backdoor);
-
-   /*
-* VMware backdoor emulation on #GP interception only handles IN{S},
-* OUT{S}, and RDPMC, none of which generate a non-zero error code.
-*/
-   if (error_code) {
-   kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
-   return 1;
-   }
-   return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
-}
-
 static bool is_erratum_383(void)
 {
int err, i;
@@ -2173,6 +2164,88 @@ static int vmrun_interception(struct vcpu_svm *svm)
return nested_svm_vmrun(svm);
 }
 
+enum {
+   NONE_SVM_INSTR,
+   SVM_INSTR_VMRUN,
+   SVM_INSTR_VMLOAD,
+   SVM_INSTR_VMSAVE,
+};
+
+/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
+static int svm_instr_opcode(struct kvm_vcpu *vcpu)
+{
+   struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+   if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
+   return NONE_SVM_INSTR;
+
+   switch (ctxt->modrm) {
+   case 0xd8: /* VMRUN */
+   return SVM_INSTR_VMRUN;
+   case 0xda: /* VMLOAD */
+   return SVM_INSTR_VMLOAD;
+   case 0xdb: /* VMSAVE */
+   return SVM_INSTR_VMSAVE;
+   default:
+   break;
+   }
+
+   return NONE_SVM_INSTR;
+}
+
+static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
+{
+   int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
+   [SVM_INSTR_VMRUN] = vmrun_interception,
+   [SVM_INSTR_VMLOAD] = vmload_interception,
+   [SVM_INSTR_VMSAVE] = vmsave_interception,
+   };
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return svm_instr_handlers[opcode](svm);
+}
+
+/*
+ * #GP handling code. Note that #GP can be triggered under the following two
+ * cases:
+ *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
+ *  some AMD CPUs when EAX of these instructions are in the reserved memory
+ *  regions (e.g. SMM memory on host).
+ *   2) VMware backdoor
+ */
+static int gp_interception(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = >vcpu;
+   u32 error_code = svm->vmcb->control.exit_info_1;
+   int opcode;
+
+   /* Both #GP cases have zero error_code */
+   if (error_code)
+   goto reinject;
+
+   /* Decode the instruction for usage later */
+   if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
+   goto reinject;
+
+   opcode = svm_instr_opcode(vcpu);
+
+   if (opcode == NONE_SVM_INSTR) {
+   WARN_ON_ONCE(!enable_vmware_backdoor);
+
+   /*
+* VMware backdoor emulation on #GP