Re: [PATCH v3] KVM/x86: Move definition of __ex to x86.h

2020-12-21 Thread Krish Sadhukhan



On 12/21/20 11:48 AM, Uros Bizjak wrote:

Merge __kvm_handle_fault_on_reboot with its sole user
and move the definition of __ex to a common include to be
shared between VMX and SVM.

v2: Rebase to the latest kvm/queue.

v3: Incorporate changes from review comments.

Cc: Paolo Bonzini 
Cc: Sean Christopherson 
Signed-off-by: Uros Bizjak 
Reviewed-by: Krish Sadhukhan 
---
  arch/x86/include/asm/kvm_host.h | 25 -
  arch/x86/kvm/svm/sev.c  |  2 --
  arch/x86/kvm/svm/svm.c  |  2 --
  arch/x86/kvm/vmx/vmx.c  |  4 +---
  arch/x86/kvm/vmx/vmx_ops.h  |  4 +---
  arch/x86/kvm/x86.h  | 24 
  6 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 39707e72b062..a78e4b1a5d77 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1634,31 +1634,6 @@ enum {
  #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 
1 : 0)
  #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
  
-asmlinkage void kvm_spurious_fault(void);

-
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Usually after catching the fault we just panic; during reboot
- * instead the instruction is ignored.
- */
-#define __kvm_handle_fault_on_reboot(insn) \
-   "666: \n\t"   \
-   insn "\n\t"   \
-   "jmp   668f \n\t" \
-   "667: \n\t"   \
-   "1: \n\t" \
-   ".pushsection .discard.instr_begin \n\t"  \
-   ".long 1b - . \n\t"   \
-   ".popsection \n\t"\
-   "call  kvm_spurious_fault \n\t"   \
-   "1: \n\t" \
-   ".pushsection .discard.instr_end \n\t"\
-   ".long 1b - . \n\t"   \
-   ".popsection \n\t"\
-   "668: \n\t"   \
-   _ASM_EXTABLE(666b, 667b)
-
  #define KVM_ARCH_WANT_MMU_NOTIFIER
  int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long 
end,
unsigned flags);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e57847ff8bd2..ba492b6d37a0 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -25,8 +25,6 @@
  #include "cpuid.h"
  #include "trace.h"
  
-#define __ex(x) __kvm_handle_fault_on_reboot(x)

-
  static u8 sev_enc_bit;
  static int sev_flush_asids(void);
  static DECLARE_RWSEM(sev_deactivate_lock);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 941e5251e13f..733d9f98a121 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -42,8 +42,6 @@
  
  #include "svm.h"
  
-#define __ex(x) __kvm_handle_fault_on_reboot(x)

-
  MODULE_AUTHOR("Qumranet");
  MODULE_LICENSE("GPL");
  
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index 75c9c6a0a3a4..b82f2689f2d7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2320,9 +2320,7 @@ static void vmclear_local_loaded_vmcss(void)
  }
  
  
-/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()

- * tricks.
- */
+/* Just like cpu_vmxoff(), but with the fault handling. */
  static void kvm_cpu_vmxoff(void)
  {
asm volatile (__ex("vmxoff"));
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 692b0c31c9c8..7e3cb53c413f 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -4,13 +4,11 @@
  
  #include 
  
-#include 

  #include 
  
  #include "evmcs.h"

  #include "vmcs.h"
-
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
+#include "x86.h"
  
  asmlinkage void vmread_error(unsigned long field, bool fault);

  __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c5ee0f5ce0f1..5b16d2b5c3bc 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,30 @@
  #include "kvm_cache_regs.h"
  #include "kvm_emulate.h"
  
+asmlinkage void kvm_spurious_fault(void);

+
+/*
+ * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
+ *

Re: [PATCH] KVM/x86: Move definition of __ex to x86.h

2020-12-18 Thread Krish Sadhukhan



On 12/18/20 4:11 AM, Uros Bizjak wrote:

Merge __kvm_handle_fault_on_reboot with its sole user
and move the definition of __ex to a common include to be
shared between VMX and SVM.

Cc: Paolo Bonzini 
Cc: Sean Christopherson 
Signed-off-by: Uros Bizjak 
---
  arch/x86/include/asm/kvm_host.h | 25 -
  arch/x86/kvm/svm/svm.c  |  2 --
  arch/x86/kvm/vmx/vmx_ops.h  |  4 +---
  arch/x86/kvm/x86.h  | 23 +++
  4 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e5f33a0d0e2..ff152ee1d63f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1623,31 +1623,6 @@ enum {
  #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 
1 : 0)
  #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
  
-asmlinkage void kvm_spurious_fault(void);

-
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Usually after catching the fault we just panic; during reboot
- * instead the instruction is ignored.
- */
-#define __kvm_handle_fault_on_reboot(insn) \
-   "666: \n\t"   \
-   insn "\n\t"   \
-   "jmp   668f \n\t" \
-   "667: \n\t"   \
-   "1: \n\t" \
-   ".pushsection .discard.instr_begin \n\t"  \
-   ".long 1b - . \n\t"   \
-   ".popsection \n\t"\
-   "call  kvm_spurious_fault \n\t"   \
-   "1: \n\t" \
-   ".pushsection .discard.instr_end \n\t"\
-   ".long 1b - . \n\t"   \
-   ".popsection \n\t"\
-   "668: \n\t"   \
-   _ASM_EXTABLE(666b, 667b)
-
  #define KVM_ARCH_WANT_MMU_NOTIFIER
  int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long 
end,
unsigned flags);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index da7eb4aaf44f..0a72ab9fd568 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -42,8 +42,6 @@
  
  #include "svm.h"
  
-#define __ex(x) __kvm_handle_fault_on_reboot(x)

-
  MODULE_AUTHOR("Qumranet");
  MODULE_LICENSE("GPL");
  
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h

index 692b0c31c9c8..7e3cb53c413f 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -4,13 +4,11 @@
  
  #include 
  
-#include 

  #include 
  
  #include "evmcs.h"

  #include "vmcs.h"
-
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
+#include "x86.h"
  
  asmlinkage void vmread_error(unsigned long field, bool fault);

  __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e7ca622a468f..608548d05e84 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -7,6 +7,29 @@
  #include "kvm_cache_regs.h"
  #include "kvm_emulate.h"
  
+asmlinkage void kvm_spurious_fault(void);

+
+/*
+ * Hardware virtualization extension instructions may fault if a
+ * reboot turns off virtualization while processes are running.
+ * Usually after catching the fault we just panic; during reboot
+ * instead the instruction is ignored.
+ */
+#define __ex(insn) \
+   "666:  " insn "\n"  \
+   "  jmp 669f\n"\
+   "667:\n"  \
+   ".pushsection .discard.instr_begin\n" \
+   ".long 667b - .\n"\
+   ".popsection\n"   \
+   "  call kvm_spurious_fault\n" \
+   "668:\n"  \
+   ".pushsection .discard.instr_end\n"   \
+   ".long 668b - .\n"    \
+   ".popsection\n"   \
+   "669:\n"  \
+   _ASM_EXTABLE(666b, 667b)
+
  #define KVM_DEFAULT_PLE_GAP   128
  #define KVM_VMX_DEFAULT_PLE_WINDOW4096
  #define KVM_DEFAULT_PLE_WINDOW_GROW   2

Reviewed-by: Krish Sadhukhan 


[tip: x86/cpu] KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains

2020-09-19 Thread tip-bot2 for Krish Sadhukhan
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: e1ebb2b49048c4767cfa0d8466f9c701e549fa5e
Gitweb:
https://git.kernel.org/tip/e1ebb2b49048c4767cfa0d8466f9c701e549fa5e
Author:Krish Sadhukhan 
AuthorDate:Thu, 17 Sep 2020 21:20:38 
Committer: Borislav Petkov 
CommitterDate: Sat, 19 Sep 2020 20:46:59 +02:00

KVM: SVM: Don't flush cache if hardware enforces cache coherency across 
encryption domains

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page in a VM is enforced. In
such a system, it is not required for software to flush the VM's page
from all CPU caches in the system prior to changing the value of the
C-bit for the page.

So check that bit before flushing the cache.

Signed-off-by: Krish Sadhukhan 
Signed-off-by: Borislav Petkov 
Acked-by: Paolo Bonzini 
Link: https://lkml.kernel.org/r/20200917212038.5090-4-krish.sadhuk...@oracle.com
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 402dc42..567792f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], 
unsigned long npages)
uint8_t *page_virtual;
unsigned long i;
 
-   if (npages == 0 || pages == NULL)
+   if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
+   pages == NULL)
return;
 
for (i = 0; i < npages; i++) {


[tip: x86/cpu] x86/cpu: Add hardware-enforced cache coherency as a CPUID feature

2020-09-18 Thread tip-bot2 for Krish Sadhukhan
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: 5866e9205b47a983a77ebc8654949f696342f2ab
Gitweb:
https://git.kernel.org/tip/5866e9205b47a983a77ebc8654949f696342f2ab
Author:Krish Sadhukhan 
AuthorDate:Thu, 17 Sep 2020 21:20:36 
Committer: Borislav Petkov 
CommitterDate: Fri, 18 Sep 2020 10:46:41 +02:00

x86/cpu: Add hardware-enforced cache coherency as a CPUID feature

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x801f.

 [ bp: Use one of the free slots in word 3. ]

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200917212038.5090-2-krish.sadhuk...@oracle.com
---
 arch/x86/include/asm/cpufeatures.h | 2 +-
 arch/x86/kernel/cpu/scattered.c| 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 83fc9d3..50b2a8d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32  ( 3*32+14) /* "" syscall in IA32 
userspace */
 #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 
userspace */
 #define X86_FEATURE_REP_GOOD   ( 3*32+16) /* REP microcode works well 
*/
-/* free( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT   ( 3*32+17) /* "" AMD hardware-enforced 
cache coherency */
 #define X86_FEATURE_LFENCE_RDTSC   ( 3*32+18) /* "" LFENCE synchronizes 
RDTSC */
 #define X86_FEATURE_ACC_POWER  ( 3*32+19) /* AMD Accumulated Power 
Mechanism */
 #define X86_FEATURE_NOPL   ( 3*32+20) /* The NOPL (0F 1F) 
instructions */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c..3221b71 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
{ X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
+   { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };
 


[tip: x86/cpu] x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains

2020-09-18 Thread tip-bot2 for Krish Sadhukhan
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: 75d1cc0e05af579301ce4e49cf6399be4b4e6e76
Gitweb:
https://git.kernel.org/tip/75d1cc0e05af579301ce4e49cf6399be4b4e6e76
Author:Krish Sadhukhan 
AuthorDate:Thu, 17 Sep 2020 21:20:37 
Committer: Borislav Petkov 
CommitterDate: Fri, 18 Sep 2020 10:47:00 +02:00

x86/mm/pat: Don't flush cache if hardware enforces cache coherency across 
encryption domnains

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a
system, it is not required for software to flush the page from all CPU
caches in the system prior to changing the value of the C-bit for the
page. So check that bit before flushing the cache.

 [ bp: Massage commit message. ]

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200917212038.5090-3-krish.sadhuk...@oracle.com
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a88..40baa90 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int 
numpages, bool enc)
/*
 * Before changing the encryption attribute, we need to flush caches.
 */
-   cpa_flush(, 1);
+   cpa_flush(, !this_cpu_has(X86_FEATURE_SME_COHERENT));
 
ret = __change_page_attr_set_clr(, 1);
 


[tip: x86/cpu] x86/cpu: Add hardware-enforced cache coherency as a CPUID feature

2020-09-18 Thread tip-bot2 for Krish Sadhukhan
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: f1f325183519ba25370765607e2732d6edf53de1
Gitweb:
https://git.kernel.org/tip/f1f325183519ba25370765607e2732d6edf53de1
Author:Krish Sadhukhan 
AuthorDate:Thu, 17 Sep 2020 21:20:36 
Committer: Borislav Petkov 
CommitterDate: Fri, 18 Sep 2020 09:46:06 +02:00

x86/cpu: Add hardware-enforced cache coherency as a CPUID feature

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x801f.

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200917212038.5090-2-krish.sadhuk...@oracle.com
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/scattered.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 83fc9d3..ba6e8f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
 #define X86_FEATURE_FENCE_SWAPGS_USER  (11*32+ 4) /* "" LFENCE in user entry 
SWAPGS path */
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL(11*32+ 5) /* "" LFENCE in 
kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_SME_COHERENT   (11*32+ 7) /* "" AMD hardware-enforced 
cache coherency */
 
 /* Intel-defined CPU features, CPUID level 0x0007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16(12*32+ 5) /* AVX512 BFLOAT16 
instructions */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c..3221b71 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
{ X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
+   { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };
 


[tip: x86/cpu] x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains

2020-09-18 Thread tip-bot2 for Krish Sadhukhan
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: 789521fca70ec8cb98f7257b880405e46f8a47a1
Gitweb:
https://git.kernel.org/tip/789521fca70ec8cb98f7257b880405e46f8a47a1
Author:Krish Sadhukhan 
AuthorDate:Thu, 17 Sep 2020 21:20:37 
Committer: Borislav Petkov 
CommitterDate: Fri, 18 Sep 2020 09:48:22 +02:00

x86/mm/pat: Don't flush cache if hardware enforces cache coherency across 
encryption domnains

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a
system, it is not required for software to flush the page from all CPU
caches in the system prior to changing the value of the C-bit for the
page. So check that bit before flushing the cache.

 [ bp: Massage commit message. ]

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20200917212038.5090-3-krish.sadhuk...@oracle.com
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a88..40baa90 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int 
numpages, bool enc)
/*
 * Before changing the encryption attribute, we need to flush caches.
 */
-   cpa_flush(, 1);
+   cpa_flush(, !this_cpu_has(X86_FEATURE_SME_COHERENT));
 
ret = __change_page_attr_set_clr(, 1);
 


[PATCH 3/3 v4] KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains

2020-09-17 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page in a VM is enforced. In such a
system, it is not required for software to flush the VM's page from all CPU
caches in the system prior to changing the value of the C-bit for the page.

Signed-off-by: Krish Sadhukhan 
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7bf7bf734979..3c9a45efdd4d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], 
unsigned long npages)
uint8_t *page_virtual;
unsigned long i;
 
-   if (npages == 0 || pages == NULL)
+   if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
+   pages == NULL)
return;
 
for (i = 0; i < npages; i++) {
-- 
2.18.4



[PATCH 2/3 v4] x86: AMD: Don't flush cache if hardware enforces cache coherency across encryption domnains

2020-09-17 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for the page.

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a889f035..40baa90e74f4 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int 
numpages, bool enc)
/*
 * Before changing the encryption attribute, we need to flush caches.
 */
-   cpa_flush(, 1);
+   cpa_flush(, !this_cpu_has(X86_FEATURE_SME_COHERENT));
 
ret = __change_page_attr_set_clr(, 1);
 
-- 
2.18.4



[PATCH 1/3 v4] x86: AMD: Add hardware-enforced cache coherency as a CPUID feature

2020-09-17 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x801f.

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/scattered.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..c3fada5f5f71 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
 #define X86_FEATURE_FENCE_SWAPGS_USER  (11*32+ 4) /* "" LFENCE in user entry 
SWAPGS path */
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL(11*32+ 5) /* "" LFENCE in 
kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_SME_COHERENT   (11*32+ 7) /* "" AMD hardware-enforced 
cache coherency */
 
 /* Intel-defined CPU features, CPUID level 0x0007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16(12*32+ 5) /* AVX512 BFLOAT16 
instructions */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c3c97a..0bc2668f22e6 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
{ X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
+   { X86_FEATURE_SME_COHERENT, CPUID_EAX,  10, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };
 
-- 
2.18.4



[PATCH 0/3 v4] x86: AMD: Don't flush cache if hardware enforces cache coherency across encryption domains

2020-09-17 Thread Krish Sadhukhan
v3 -> v4:
1. Patch# 1 from v3 has been dropped.
2. The CPUID feature for hardware-enforced cache coherency has been
   renamed.

[PATCH 1/3 v4] x86: AMD: Add hardware-enforced cache coherency as a
[PATCH 2/3 v4] x86: AMD: Don't flush cache if hardware enforces cache
[PATCH 3/3 v4] KVM: SVM: Don't flush cache if hardware enforces cache

 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/scattered.c| 1 +
 arch/x86/kvm/svm/sev.c | 3 ++-
 arch/x86/mm/pat/set_memory.c   | 2 +-
 4 files changed, 5 insertions(+), 2 deletions(-)

Krish Sadhukhan (3):
  x86: AMD: Add hardware-enforced cache coherency as a CPUID feature
  x86: AMD: Don't flush cache if hardware enforces cache coherency across 
encryption domnains
  KVM: SVM: Don't flush cache if hardware enforces cache coherency across 
encryption domains



Re: [PATCH] KVM: SVM: Get rid of the variable 'exit_fastpath'

2020-09-15 Thread Krish Sadhukhan



On 9/15/20 4:30 AM, lihaiwei.ker...@gmail.com wrote:

From: Haiwei Li 

'exit_fastpath' isn't used anywhere else, so remove it.

Suggested-by: Krish Sadhukhan 
Signed-off-by: Haiwei Li 
---
  arch/x86/kvm/svm/svm.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c44f3e9..6e88658 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3413,7 +3413,6 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu 
*vcpu,
  
  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)

  {
-   fastpath_t exit_fastpath;
struct vcpu_svm *svm = to_svm(vcpu);
  
  	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];

@@ -3536,8 +3535,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu 
*vcpu)
svm_handle_mce(svm);
  
  	svm_complete_interrupts(svm);

-   exit_fastpath = svm_exit_handlers_fastpath(vcpu);
-   return exit_fastpath;
+   return svm_exit_handlers_fastpath(vcpu);
  }
  
  static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,

Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: SVM: Analyze is_guest_mode() in svm_vcpu_run()

2020-09-14 Thread Krish Sadhukhan



On 9/13/20 11:55 PM, Wanpeng Li wrote:

From: Wanpeng Li 

Analyze is_guest_mode() in svm_vcpu_run() instead of 
svm_exit_handlers_fastpath()
in conformity with VMX version.

Suggested-by: Vitaly Kuznetsov 
Signed-off-by: Wanpeng Li 
---
  arch/x86/kvm/svm/svm.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3da5b2f..009035a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3393,8 +3393,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
  
  static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)

  {
-   if (!is_guest_mode(vcpu) &&
-   to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+   if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
to_svm(vcpu)->vmcb->control.exit_info_1)
return handle_fastpath_set_msr_irqoff(vcpu);
  
@@ -3580,6 +3579,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)

svm_handle_mce(svm);
  
  	svm_complete_interrupts(svm);

+
+   if (is_guest_mode(vcpu))
+   return EXIT_FASTPATH_NONE;
+
exit_fastpath = svm_exit_handlers_fastpath(vcpu);
return exit_fastpath;


Not related to your changes, but should we get rid of the variable 
'exit_fastpath' and just do,


        return svm_exit_handler_fastpath(vcpu);

It seems the variable isn't used anywhere else and svm_vcpu_run() 
doesn't return from anywhere else either.


Also, svm_exit_handlers_fastpath() doesn't have any other caller.  
Should we get rid of it as well ?



For your changes,

    Reviewed-by: Krish Sadhukhan 


  }


Re: [PATCH 2/4 v3] x86: AMD: Add hardware-enforced cache coherency as a CPUID feature

2020-09-11 Thread Krish Sadhukhan



On 9/11/20 12:36 PM, Dave Hansen wrote:

On 9/11/20 12:25 PM, Krish Sadhukhan wrote:

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 81335e6fe47d..0e5b27ee5931 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -293,6 +293,7 @@
  #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry 
SWAPGS path */
  #define X86_FEATURE_FENCE_SWAPGS_KERNEL   (11*32+ 5) /* "" LFENCE in 
kernel entry SWAPGS path */
  #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_HW_CACHE_COHERENCY (11*32+ 7) /* AMD hardware-enforced 
cache coherency */

That's an awfully generic name.  We generally have "hardware-enforced
cache coherency" already everywhere. :)

This probably needs to say something about encryption, or even SEV
specifically.



How about X86_FEATURE_ENC_CACHE_COHERENCY ?


I also don't see this bit in the "AMD64 Architecture
Programmer’s Manual".  Did I look in the wrong spot somehow?

Section 7.10.6 in APM mentions this.


[PATCH 2/4 v3] x86: AMD: Add hardware-enforced cache coherency as a CPUID feature

2020-09-11 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x801f.

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/scattered.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 81335e6fe47d..0e5b27ee5931 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -293,6 +293,7 @@
 #define X86_FEATURE_FENCE_SWAPGS_USER  (11*32+ 4) /* "" LFENCE in user entry 
SWAPGS path */
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL(11*32+ 5) /* "" LFENCE in 
kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_HW_CACHE_COHERENCY (11*32+ 7) /* AMD hardware-enforced 
cache coherency */
 
 /* Intel-defined CPU features, CPUID level 0x0007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16(12*32+ 5) /* AVX512 BFLOAT16 
instructions */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 033c112e03fc..57394fee1d35 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
{ X86_FEATURE_SME,  CPUID_EAX,  0, CPUID_AMD_SME, 0 },
{ X86_FEATURE_SEV,  CPUID_EAX,  1, CPUID_AMD_SME, 0 },
+   { X86_FEATURE_HW_CACHE_COHERENCY, CPUID_EAX,  10, CPUID_AMD_SME, 0 },
{ 0, 0, 0, 0, 0 }
 };
 
-- 
2.18.4



[PATCH 0/4 v3] x86: AMD: Don't flush cache if hardware enforces cache coherency across encryption domains

2020-09-11 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x801f.

Add this as a CPUID feature and skip flushing caches if the feature is present.

v2 -> v3:
Patch# 2: Moves the addition of the CPUID feature from 
  early_detect_mem_encrypt() to scattered.c.
Patch# 3,4: These two are the split of patch# 3 from v2. Patch# 3
 is for non[PATCH 0/4 v3] x86: AMD: Don't flush encrypted pages 
if hardware enforces cache coherency-SEV encryptions while patch#4 is for SEV
 encryptions.

[PATCH 1/4 v3] x86: AMD: Replace numeric value for SME CPUID leaf with a
[PATCH 2/4 v3] x86: AMD: Add hardware-enforced cache coherency as a
[PATCH 3/4 v3] x86: AMD: Don't flush cache if hardware enforces cache
[PATCH 4/4 v3] KVM: SVM: Don't flush cache if hardware enforces cache

 arch/x86/boot/compressed/mem_encrypt.S | 5 +++--
 arch/x86/include/asm/cpufeatures.h | 6 ++
 arch/x86/kernel/cpu/amd.c  | 2 +-
 arch/x86/kernel/cpu/scattered.c| 5 +++--
 arch/x86/kvm/cpuid.c   | 2 +-
 arch/x86/kvm/svm/sev.c | 3 ++-
 arch/x86/kvm/svm/svm.c | 4 ++--
 arch/x86/mm/mem_encrypt_identity.c | 4 ++--
 arch/x86/mm/pat/set_memory.c   | 2 +-
 9 files changed, 21 insertions(+), 12 deletions(-)

Krish Sadhukhan (4):
  x86: AMD: Replace numeric value for SME CPUID leaf with a #define
  x86: AMD: Add hardware-enforced cache coherency as a CPUID feature
  x86: AMD: Don't flush cache if hardware enforces cache coherency across en
cryption domnains
  KVM: SVM: Don't flush cache if hardware enforces cache coherency across en
cryption domains



[PATCH 3/4 v3] x86: AMD: Don't flush cache if hardware enforces cache coherency across encryption domnains

2020-09-11 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for the page.

Suggested-by: Tom Lendacky 
Signed-off-by: Krish Sadhukhan 
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a889f035..78d5511c5edd 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int 
numpages, bool enc)
/*
 * Before changing the encryption attribute, we need to flush caches.
 */
-   cpa_flush(, 1);
+   cpa_flush(, !this_cpu_has(X86_FEATURE_HW_CACHE_COHERENCY));
 
ret = __change_page_attr_set_clr(, 1);
 
-- 
2.18.4



[PATCH 1/4 v3] x86: AMD: Replace numeric value for SME CPUID leaf with a #define

2020-09-11 Thread Krish Sadhukhan
Signed-off-by: Krish Sadhukhan 
---
 arch/x86/boot/compressed/mem_encrypt.S | 5 +++--
 arch/x86/include/asm/cpufeatures.h | 5 +
 arch/x86/kernel/cpu/amd.c  | 2 +-
 arch/x86/kernel/cpu/scattered.c| 4 ++--
 arch/x86/kvm/cpuid.c   | 2 +-
 arch/x86/kvm/svm/svm.c | 4 ++--
 arch/x86/mm/mem_encrypt_identity.c | 4 ++--
 7 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/x86/boot/compressed/mem_encrypt.S 
b/arch/x86/boot/compressed/mem_encrypt.S
index dd07e7b41b11..22e30b0c0d19 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
.text
.code32
@@ -31,7 +32,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
 
movl$0x8000, %eax   /* CPUID to check the highest leaf */
cpuid
-   cmpl$0x801f, %eax   /* See if 0x801f is available */
+   cmpl$CPUID_AMD_SME, %eax/* See if 0x801f is available */
jb  .Lno_sev
 
/*
@@ -40,7 +41,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
 *   CPUID Fn8000_001F[EBX] - Bits 5:0
 * Pagetable bit position used to indicate encryption
 */
-   movl$0x801f, %eax
+   movl$CPUID_AMD_SME, %eax
cpuid
bt  $1, %eax/* Check if SEV is available */
jnc .Lno_sev
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..81335e6fe47d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -10,6 +10,11 @@
 #include 
 #endif
 
+/*
+ * AMD CPUID functions
+ */
+#define CPUID_AMD_SME  0x801f  /* Secure Memory Encryption */
+
 /*
  * Defines x86 CPU feature bits
  */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index dcc3d943c68f..4507ededb978 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -630,7 +630,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 * will be a value above 32-bits this is still done for
 * CONFIG_X86_32 so that accurate values are reported.
 */
-   c->x86_phys_bits -= (cpuid_ebx(0x801f) >> 6) & 0x3f;
+   c->x86_phys_bits -= (cpuid_ebx(CPUID_AMD_SME) >> 6) & 0x3f;
 
if (IS_ENABLED(CONFIG_X86_32))
goto clear_all;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c3c97a..033c112e03fc 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -39,8 +39,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
{ X86_FEATURE_MBA,  CPUID_EBX,  6, 0x8008, 0 },
-   { X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
-   { X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
+   { X86_FEATURE_SME,  CPUID_EAX,  0, CPUID_AMD_SME, 0 },
+   { X86_FEATURE_SEV,  CPUID_EAX,  1, CPUID_AMD_SME, 0 },
{ 0, 0, 0, 0, 0 }
 };
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3fd6eec202d7..95863e767d3d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -756,7 +756,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array 
*array, u32 function)
entry->edx = 0;
break;
case 0x8000:
-   entry->eax = min(entry->eax, 0x801f);
+   entry->eax = min(entry->eax, CPUID_AMD_SME);
break;
case 0x8001:
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0194336b64a4..a4e92ae399b4 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -749,7 +749,7 @@ static __init void svm_adjust_mmio_mask(void)
u64 msr, mask;
 
/* If there is no memory encryption support, use existing mask */
-   if (cpuid_eax(0x8000) < 0x801f)
+   if (cpuid_eax(0x8000) < CPUID_AMD_SME)
return;
 
/* If memory encryption is not enabled, use existing mask */
@@ -757,7 +757,7 @@ static __init void svm_adjust_mmio_mask(void)
if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
return;
 
-   enc_bit = cpuid_ebx(0x801f) & 0x3f;
+   enc_bit = cpuid_ebx(CPUID_AMD_SME) & 0x3f;
mask_bit = boot_cpu_data.x86_phys_bits;
 
/* Increment the mask bit if it is the same as the encryption bit */
diff --git a/arch/x86/mm/mem_encrypt_identity.c 
b/arch/x86/mm/mem_encrypt_identity.c
index e2b0e2ac07bb..cbe600dd357b 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x8

[PATCH 4/4 v3] KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains

2020-09-11 Thread Krish Sadhukhan
In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page in a VM is enforced. In such a
system, it is not required for software to flush the VM's page from all CPU
caches in the system prior to changing the value of the C-bit for the page.

Signed-off-by: Krish Sadhukhan 
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 402dc4234e39..8aa2209f2637 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], 
unsigned long npages)
uint8_t *page_virtual;
unsigned long i;
 
-   if (npages == 0 || pages == NULL)
+   if (this_cpu_has(X86_FEATURE_HW_CACHE_COHERENCY) || npages == 0 ||
+   pages == NULL)
return;
 
for (i = 0; i < npages; i++) {
-- 
2.18.4



Re: [PATCH] KVM: SVM: Mark SEV launch secret pages as dirty.

2020-08-07 Thread Krish Sadhukhan



On 8/6/20 6:23 PM, Cfir Cohen wrote:

The LAUNCH_SECRET command performs encryption of the
launch secret memory contents. Mark pinned pages as
dirty, before unpinning them.
This matches the logic in sev_launch_update().

sev_launch_update_data() instead of sev_launch_update() ?


Signed-off-by: Cfir Cohen 
---
  arch/x86/kvm/svm/sev.c | 15 ++-
  1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5573a97f1520..37c47d26b9f7 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -850,7 +850,7 @@ static int sev_launch_secret(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
struct kvm_sev_launch_secret params;
struct page **pages;
void *blob, *hdr;
-   unsigned long n;
+   unsigned long n, i;
int ret, offset;
  
  	if (!sev_guest(kvm))

@@ -863,6 +863,14 @@ static int sev_launch_secret(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
if (!pages)
return -ENOMEM;
  
+	/*

+* The LAUNCH_SECRET command will perform in-place encryption of the
+* memory content (i.e it will write the same memory region with C=1).
+* It's possible that the cache may contain the data with C=0, i.e.,
+* unencrypted so invalidate it first.
+*/
+   sev_clflush_pages(pages, n);
+
/*
 * The secret must be copied into contiguous memory region, lets verify
 * that userspace memory pages are contiguous before we issue command.
@@ -908,6 +916,11 @@ static int sev_launch_secret(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
  e_free:
kfree(data);
  e_unpin_memory:
+   /* content of memory is updated, mark pages dirty */
+   for (i = 0; i < n; i++) {
+   set_page_dirty_lock(pages[i]);
+   mark_page_accessed(pages[i]);
+   }
sev_unpin_memory(kvm, pages, n);
return ret;
  }

Reviewed-by: Krish Sadhukhan 


Re: [PATCH 4/4] KVM: SVM: Rename svm_nested_virtualize_tpr() to nested_svm_virtualize_tpr()

2020-06-25 Thread Krish Sadhukhan



On 6/25/20 1:03 AM, Joerg Roedel wrote:

From: Joerg Roedel 

Match the naming with other nested svm functions.

No functional changes.

Signed-off-by: Joerg Roedel 
---
  arch/x86/kvm/svm/svm.c | 6 +++---
  arch/x86/kvm/svm/svm.h | 2 +-
  2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e0b0321d95d4..b0d551bcf2a0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3040,7 +3040,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, 
int tpr, int irr)
  {
struct vcpu_svm *svm = to_svm(vcpu);
  
-	if (svm_nested_virtualize_tpr(vcpu))

+   if (nested_svm_virtualize_tpr(vcpu))
return;
  
  	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);

@@ -3234,7 +3234,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu 
*vcpu)
  {
struct vcpu_svm *svm = to_svm(vcpu);
  
-	if (svm_nested_virtualize_tpr(vcpu))

+   if (nested_svm_virtualize_tpr(vcpu))
return;
  
  	if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {

@@ -3248,7 +3248,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu 
*vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8;
  
-	if (svm_nested_virtualize_tpr(vcpu) ||

+   if (nested_svm_virtualize_tpr(vcpu) ||
kvm_vcpu_apicv_active(vcpu))
return;
  
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index 003e89008331..8907efda0b0a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -365,7 +365,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
  #define NESTED_EXIT_DONE  1   /* Exit caused nested vmexit  */
  #define NESTED_EXIT_CONTINUE  2   /* Further checks needed  */
  
-static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)

+static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
  {
struct vcpu_svm *svm = to_svm(vcpu);
  
Should the name be changed to svm_nested_virtualized_tpr ?  Because 
svm_nested_virtualize_tpr implies that the action of the function is to 
virtualize the TPR.


Re: [PATCH 0/4] KVM: SVM: Code move follow-up

2020-06-25 Thread Krish Sadhukhan



On 6/25/20 1:03 AM, Joerg Roedel wrote:

From: Joerg Roedel 

Hi,

here is small series to follow-up on the review comments for moving
the kvm-amd module code to its own sub-directory. The comments were
only about renaming structs and symbols, so there are no functional
changes in these patches.

The comments addressed here are all from [1].

Regards,

Joerg

[1] https://lore.kernel.org/lkml/87d0917ezq@vitty.brq.redhat.com/

Joerg Roedel (4):
   KVM: SVM: Rename struct nested_state to svm_nested_state
   KVM: SVM: Add vmcb_ prefix to mark_*() functions
   KVM: SVM: Add svm_ prefix to set/clr/is_intercept()
   KVM: SVM: Rename svm_nested_virtualize_tpr() to
 nested_svm_virtualize_tpr()

  arch/x86/kvm/svm/avic.c   |   2 +-
  arch/x86/kvm/svm/nested.c |   8 +--
  arch/x86/kvm/svm/sev.c|   2 +-
  arch/x86/kvm/svm/svm.c| 138 +++---
  arch/x86/kvm/svm/svm.h|  20 +++---
  5 files changed, 85 insertions(+), 85 deletions(-)


Reviewed-by: Krish Sadhukhan 


Re: [PATCH 30/30] KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE

2020-06-01 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

Similar to VMX, the state that is captured through the currently available
IOCTLs is a mix of L1 and L2 state, dependent on whether the L2 guest was
running at the moment when the process was interrupted to save its state.

In particular, the SVM-specific state for nested virtualization includes
the L1 saved state (including the interrupt flag), the cached L2 controls,
and the GIF.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/include/uapi/asm/kvm.h |  17 +++-
  arch/x86/kvm/cpuid.h|   5 ++
  arch/x86/kvm/svm/nested.c   | 147 
  arch/x86/kvm/svm/svm.c  |   1 +
  arch/x86/kvm/vmx/nested.c   |   5 --
  arch/x86/kvm/x86.c  |   3 +-
  6 files changed, 171 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..12075a9de1c1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -385,18 +385,22 @@ struct kvm_sync_regs {
  #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
  
  #define KVM_STATE_NESTED_FORMAT_VMX	0

-#define KVM_STATE_NESTED_FORMAT_SVM1   /* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM1
  
  #define KVM_STATE_NESTED_GUEST_MODE	0x0001

  #define KVM_STATE_NESTED_RUN_PENDING  0x0002
  #define KVM_STATE_NESTED_EVMCS0x0004
  #define KVM_STATE_NESTED_MTF_PENDING  0x0008
+#define KVM_STATE_NESTED_GIF_SET   0x0100
  
  #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x0001

  #define KVM_STATE_NESTED_SMM_VMXON0x0002
  
  #define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
  
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000

+
+
  struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -411,6 +415,15 @@ struct kvm_vmx_nested_state_hdr {
} smm;
  };
  
+struct kvm_svm_nested_state_data {

+   /* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
+   __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+   __u64 vmcb_pa;
+};
+
  /* for KVM_CAP_NESTED_STATE */
  struct kvm_nested_state {
__u16 flags;
@@ -419,6 +432,7 @@ struct kvm_nested_state {
  
  	union {

struct kvm_vmx_nested_state_hdr vmx;
+   struct kvm_svm_nested_state_hdr svm;
  
  		/* Pad the header to 128 bytes.  */

__u8 pad[120];
@@ -431,6 +445,7 @@ struct kvm_nested_state {
 */
union {
struct kvm_vmx_nested_state_data vmx[0];
+   struct kvm_svm_nested_state_data svm[0];
} data;
  };
  
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h

index 63a70f6a3df3..05434cd9342f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -303,4 +303,9 @@ static __always_inline void 
kvm_cpu_cap_check_and_set(unsigned int x86_feature)
kvm_cpu_cap_set(x86_feature);
  }
  
+static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)

+{
+   return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
  #endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c712fe577029..6b1049148c1b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@
  #include "trace.h"
  #include "mmu.h"
  #include "x86.h"
+#include "cpuid.h"
  #include "lapic.h"
  #include "svm.h"
  
@@ -238,6 +239,8 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,

  {
copy_vmcb_control_area(>nested.ctl, control);
  
+	/* Copy it here because nested_svm_check_controls will check it.  */

+   svm->nested.ctl.asid   = control->asid;
svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
  }
@@ -930,6 +933,150 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
return NESTED_EXIT_CONTINUE;
  }
  
+static int svm_get_nested_state(struct kvm_vcpu *vcpu,

+   struct kvm_nested_state __user 
*user_kvm_nested_state,
+   u32 user_data_size)
+{
+   struct vcpu_svm *svm;
+   struct kvm_nested_state kvm_state = {
+   .flags = 0,
+   .format = KVM_STATE_NESTED_FORMAT_SVM,
+   .size = sizeof(kvm_state),
+   };
+   struct vmcb __user *user_vmcb = (struct vmcb __user *)
+   _kvm_nested_state->data.svm[0];
+
+   if (!vcpu)
+   return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
+
+   svm = to_svm(vcpu);
+
+   if (user_data_size < kvm_state.size)
+   goto out;
+
+   /* First fill in the header and copy it out.  */
+   if (is_guest_mode(vcpu)) {
+   kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
+   kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
+   kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;

Re: [PATCH 25/30] KVM: nSVM: leave guest mode when clearing EFER.SVME

2020-05-31 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

According to the AMD manual, the effect of turning off EFER.SVME while a
guest is running is undefined.  We make it leave guest mode immediately,
similar to the effect of clearing the VMX bit in MSR_IA32_FEAT_CTL.



I see that svm_set_efer() is called in enter_svm_guest_mode() and 
nested_svm_vmexit(). In the VMRUN path, we have already checked 
EFER.SVME in nested_vmcb_checks(). So if it was not set, we wouldn't 
come to enter_svm_guest_mode(). Your fix is only for the #VMEXIT path then ?




Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 16 
  arch/x86/kvm/svm/svm.c| 10 --
  arch/x86/kvm/svm/svm.h|  1 +
  3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bd3a89cd4070..369eca73fe3e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -618,6 +618,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
return 0;
  }
  
+/*

+ * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+ */
+void svm_leave_nested(struct vcpu_svm *svm)
+{
+   if (is_guest_mode(>vcpu)) {
+   struct vmcb *hsave = svm->nested.hsave;
+   struct vmcb *vmcb = svm->vmcb;
+
+   svm->nested.nested_run_pending = 0;
+   leave_guest_mode(>vcpu);
+   copy_vmcb_control_area(>control, >control);
+   nested_svm_uninit_mmu_context(>vcpu);
+   }
+}
+
  static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
  {
u32 offset, msr, value;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bc08221f6743..b4db9a980469 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -265,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
  
  void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)

  {
+   struct vcpu_svm *svm = to_svm(vcpu);
vcpu->arch.efer = efer;
  
  	if (!npt_enabled) {

@@ -275,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
efer &= ~EFER_LME;
}
  
-	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;

-   mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+   if (!(efer & EFER_SVME)) {
+   svm_leave_nested(svm);
+   svm_set_gif(svm, true);
+   }
+
+   svm->vmcb->save.efer = efer | EFER_SVME;
+   mark_dirty(svm->vmcb, VMCB_CR);
  }
  
  static int is_external_interrupt(u32 info)

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index be8e830f83fa..6ac4c00a5d82 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -389,6 +389,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
  
  void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,

  struct vmcb *nested_vmcb);
+void svm_leave_nested(struct vcpu_svm *svm);
  int nested_svm_vmrun(struct vcpu_svm *svm);
  void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
  int nested_svm_vmexit(struct vcpu_svm *svm);


Re: [PATCH 20/30] KVM: SVM: preserve VGIF across VMCB switch

2020-05-31 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

There is only one GIF flag for the whole processor, so make sure it is not 
clobbered
when switching to L2 (in which case we also have to include the 
V_GIF_ENABLE_MASK,
lest we confuse enable_gif/disable_gif/gif_set).  When going back, L1 could in
theory have entered L2 without issuing a CLGI so make sure the svm_set_gif is
done last, after svm->vmcb->control.int_ctl has been copied back from hsave.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 7e4a506828c9..6c7f0bffdf01 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -293,6 +293,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, 
struct vmcb *nested_v
  
  static void nested_prepare_vmcb_control(struct vcpu_svm *svm)

  {
+   const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(>vcpu);
  
@@ -308,7 +309,10 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)

svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
  
-	svm->vmcb->control.int_ctl = svm->nested.ctl.int_ctl | V_INTR_MASKING_MASK;

+   svm->vmcb->control.int_ctl =
+   (svm->nested.ctl.int_ctl & ~mask) |
+   (svm->nested.hsave->control.int_ctl & mask);



If this is the very first VMRUN, do we have any int_ctl saved in hsave ?


+
svm->vmcb->control.virt_ext= svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector  = svm->nested.ctl.int_vector;
svm->vmcb->control.int_state   = svm->nested.ctl.int_state;


Re: [PATCH 17/30] KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

The control state changes on every L2->L0 vmexit, and we will have to
serialize it in the nested state.  So keep it up to date in svm->nested.ctl
and just copy them back to the nested VMCB in nested_svm_vmexit.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 57 ++-
  arch/x86/kvm/svm/svm.c|  5 +++-
  arch/x86/kvm/svm/svm.h|  1 +
  3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1e5f460b5540..921466eba556 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
  }
  
+/*

+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+   u32 mask;
+   svm->nested.ctl.event_inj  = svm->vmcb->control.event_inj;
+   svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
+
+   /* Only a few fields of int_ctl are written by the processor.  */
+   mask = V_IRQ_MASK | V_TPR_MASK;
+   if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+   is_intercept(svm, SVM_EXIT_VINTR)) {
+   /*
+* In order to request an interrupt window, L0 is usurping
+* svm->vmcb->control.int_ctl and possibly setting V_IRQ
+* even if it was clear in L1's VMCB.  Restoring it would be
+* wrong.  However, in this case V_IRQ will remain true until
+* interrupt_window_interception calls svm_clear_vintr and
+* restores int_ctl.  We can just leave it aside.
+*/
+   mask &= ~V_IRQ_MASK;
+   }
+   svm->nested.ctl.int_ctl&= ~mask;
+   svm->nested.ctl.int_ctl|= svm->vmcb->control.int_ctl & mask;
+}
+
  static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb 
*nested_vmcb)
  {
/* Load the nested guest state */
@@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Exit Guest-Mode */
leave_guest_mode(>vcpu);
svm->nested.vmcb = 0;
+   WARN_ON_ONCE(svm->nested.nested_run_pending);
  
  	/* in case we halted in L2 */

svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.dr6= svm->vcpu.arch.dr6;
nested_vmcb->save.cpl= vmcb->save.cpl;
  
-	nested_vmcb->control.int_ctl   = vmcb->control.int_ctl;

-   nested_vmcb->control.int_vector= vmcb->control.int_vector;



While it's not related to this patch, I am wondering if we need the 
following line in enter_svm_guest_mode():


    svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;


If int_vector is used only to trigger a #VMEXIT after we have decided to 
inject a virtual interrupt, we probably don't the vector value from the 
nested vmcb.



nested_vmcb->control.int_state = vmcb->control.int_state;
nested_vmcb->control.exit_code = vmcb->control.exit_code;
nested_vmcb->control.exit_code_hi  = vmcb->control.exit_code_hi;
@@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (svm->nrips_enabled)
nested_vmcb->control.next_rip  = vmcb->control.next_rip;
  
-	/*

-* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
-* to make sure that we do not lose injected events. So check event_inj
-* here and copy it to exit_int_info if it is valid.
-* Exit_int_info and event_inj can't be both valid because the case
-* below only happens on a VMRUN instruction intercept which has
-* no valid exit_int_info set.
-*/
-   if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
-   struct vmcb_control_area *nc = _vmcb->control;
-
-   nc->exit_int_info = vmcb->control.event_inj;
-   nc->exit_int_info_err = vmcb->control.event_inj_err;
-   }
-
-   nested_vmcb->control.tlb_ctl   = 0;
-   nested_vmcb->control.event_inj = 0;
-   nested_vmcb->control.event_inj_err = 0;
+   nested_vmcb->control.int_ctl   = svm->nested.ctl.int_ctl;
+   nested_vmcb->control.tlb_ctl   = svm->nested.ctl.tlb_ctl;
+   nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
+   nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
  
  	nested_vmcb->control.pause_filter_count =

svm->vmcb->control.pause_filter_count;
nested_vmcb->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
  
-	/* We always set V_INTR_MASKING and remember the old value in hflags */

-   if 

Re: [PATCH 08/30] KVM: nSVM: move map argument out of enter_svm_guest_mode

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 12:04 PM, Paolo Bonzini wrote:

On 29/05/20 20:10, Krish Sadhukhan wrote:

Unmapping the nested VMCB in enter_svm_guest_mode is a bit of a wart,
since the map is not used elsewhere in the function.  There are
just two calls, so move it there.

The last sentence sounds bit incomplete.

Good point---more precisely, "calls" should be "callers".  "It" refers
to "unmapping".


Also, does it make sense to mention the reason why unmapping is not
required before we enter guest mode ?

Unmapping is a host operation and is not visible by the guest; is this
what you mean?



Yes, I was thinking if we could mention it in the commit message...


-Krish



Paolo



Re: [PATCH 10/30] KVM: nSVM: extract preparation of VMCB for nested run

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

Split out filling svm->vmcb.save and svm->vmcb.control before VMRUN.
Only the latter will be useful when restoring nested SVM state.

This patch introduces no semantic change, so the MMU setup is still
done in nested_prepare_vmcb_save.  The next patch will clean up things.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 40 +++
  1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index fc0c6d1678eb..73be7af79453 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -245,21 +245,8 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
svm->vcpu.arch.tsc_offset += control->tsc_offset;
  }
  
-void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,

- struct vmcb *nested_vmcb)
+static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb 
*nested_vmcb)



Not a big deal, but I feel that it helps a lot in readability if we keep 
the names symmetric. This one could be named prepare_nested_vmcb_save to 
match load_nested_vmcb_control that you created in the previous patch. 
Or load_nested_vmcb_control could be renamed to nested_load_vmcb_control 
to match the name here.



  {
-   bool evaluate_pending_interrupts =
-   is_intercept(svm, INTERCEPT_VINTR) ||
-   is_intercept(svm, INTERCEPT_IRET);
-
-   svm->nested.vmcb = vmcb_gpa;
-   if (kvm_get_rflags(>vcpu) & X86_EFLAGS_IF)
-   svm->vcpu.arch.hflags |= HF_HIF_MASK;
-   else
-   svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
-
-   load_nested_vmcb_control(svm, _vmcb->control);
-
if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(>vcpu);
  
@@ -291,7 +278,10 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,

svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
svm->vcpu.arch.dr6  = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+}
  
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)

+{
svm_flush_tlb(>vcpu);
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -321,6 +311,26 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
 */
recalc_intercepts(svm);
  
+	mark_all_dirty(svm->vmcb);

+}
+
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+ struct vmcb *nested_vmcb)
+{
+   bool evaluate_pending_interrupts =
+   is_intercept(svm, INTERCEPT_VINTR) ||
+   is_intercept(svm, INTERCEPT_IRET);
+
+   svm->nested.vmcb = vmcb_gpa;
+   if (kvm_get_rflags(>vcpu) & X86_EFLAGS_IF)
+   svm->vcpu.arch.hflags |= HF_HIF_MASK;
+   else
+   svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+   load_nested_vmcb_control(svm, _vmcb->control);
+   nested_prepare_vmcb_save(svm, nested_vmcb);
+   nested_prepare_vmcb_control(svm, nested_vmcb);
+
/*
 * If L1 had a pending IRQ/NMI before executing VMRUN,
 * which wasn't delivered because it was disallowed (e.g.
@@ -336,8 +346,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
enable_gif(svm);
if (unlikely(evaluate_pending_interrupts))
kvm_make_request(KVM_REQ_EVENT, >vcpu);
-
-   mark_all_dirty(svm->vmcb);
  }
  
  int nested_svm_vmrun(struct vcpu_svm *svm)


Re: [PATCH 08/30] KVM: nSVM: move map argument out of enter_svm_guest_mode

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

Unmapping the nested VMCB in enter_svm_guest_mode is a bit of a wart,
since the map is not used elsewhere in the function.  There are
just two calls, so move it there.



The last sentence sounds bit incomplete.

Also, does it make sense to mention the reason why unmapping is not 
required before we enter guest mode ?




Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 14 ++
  arch/x86/kvm/svm/svm.c|  3 ++-
  arch/x86/kvm/svm/svm.h|  2 +-
  3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 8756c9f463fd..8e98d5e420a5 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -229,7 +229,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
  }
  
  void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,

- struct vmcb *nested_vmcb, struct kvm_host_map *map)
+ struct vmcb *nested_vmcb)
  {
bool evaluate_pending_interrupts =
is_intercept(svm, INTERCEPT_VINTR) ||
@@ -304,8 +304,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
svm->vmcb->control.pause_filter_thresh =
nested_vmcb->control.pause_filter_thresh;
  
-	kvm_vcpu_unmap(>vcpu, map, true);

-
/* Enter Guest-Mode */
enter_guest_mode(>vcpu);
  
@@ -368,10 +366,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)

nested_vmcb->control.exit_code_hi = 0;
nested_vmcb->control.exit_info_1  = 0;
nested_vmcb->control.exit_info_2  = 0;
-
-   kvm_vcpu_unmap(>vcpu, , true);
-
-   return ret;
+   goto out;
}
  
  	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,

@@ -414,7 +409,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
  
  	svm->nested.nested_run_pending = 1;

-   enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, );
+   enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
  
  	if (!nested_svm_vmrun_msrpm(svm)) {

svm->vmcb->control.exit_code= SVM_EXIT_ERR;
@@ -425,6 +420,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_svm_vmexit(svm);
}
  
+out:

+   kvm_vcpu_unmap(>vcpu, , true);
+
return ret;
  }
  
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index feb96a410f2d..76b3f553815e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3814,7 +3814,8 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const 
char *smstate)
if (kvm_vcpu_map(>vcpu, gpa_to_gfn(vmcb), ) == -EINVAL)
return 1;
nested_vmcb = map.hva;
-   enter_svm_guest_mode(svm, vmcb, nested_vmcb, );
+   enter_svm_guest_mode(svm, vmcb, nested_vmcb);
+   kvm_vcpu_unmap(>vcpu, , true);
}
return 0;
  }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 89fab75dd4f5..33e3f09d7a8e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -395,7 +395,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
  }
  
  void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,

- struct vmcb *nested_vmcb, struct kvm_host_map *map);
+ struct vmcb *nested_vmcb);
  int nested_svm_vmrun(struct vcpu_svm *svm);
  void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
  int nested_svm_vmexit(struct vcpu_svm *svm);


Re: [PATCH 06/30] KVM: SVM: always update CR3 in VMCB

2020-05-29 Thread Krish Sadhukhan



On 5/29/20 8:39 AM, Paolo Bonzini wrote:

svm_load_mmu_pgd is delaying the write of GUEST_CR3 to prepare_vmcs02


Did you mean to say enter_svm_guest_mode here ?

as
an optimization, but this is only correct before the nested vmentry.
If userspace is modifying CR3 with KVM_SET_SREGS after the VM has
already been put in guest mode, the value of CR3 will not be updated.
Remove the optimization, which almost never triggers anyway.
This was was added in commit 689f3bf21628 ("KVM: x86: unify callbacks
to load paging root", 2020-03-16) just to keep the two vendor-specific
modules closer, but we'll fix VMX too.

Fixes: 689f3bf21628 ("KVM: x86: unify callbacks to load paging root")
Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c |  6 +-
  arch/x86/kvm/svm/svm.c| 16 +---
  2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dcac4c3510ab..8756c9f463fd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -256,11 +256,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
svm_set_efer(>vcpu, nested_vmcb->save.efer);
svm_set_cr0(>vcpu, nested_vmcb->save.cr0);
svm_set_cr4(>vcpu, nested_vmcb->save.cr4);
-   if (npt_enabled) {
-   svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
-   svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
-   } else
-   (void)kvm_set_cr3(>vcpu, nested_vmcb->save.cr3);
+   (void)kvm_set_cr3(>vcpu, nested_vmcb->save.cr3);
  
  	/* Guest paging mode is active - reset mmu */

kvm_mmu_reset_context(>vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 545f63ebc720..feb96a410f2d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3447,7 +3447,6 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
  {
struct vcpu_svm *svm = to_svm(vcpu);
-   bool update_guest_cr3 = true;
unsigned long cr3;
  
  	cr3 = __sme_set(root);

@@ -3456,18 +3455,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, 
unsigned long root)
mark_dirty(svm->vmcb, VMCB_NPT);
  
  		/* Loading L2's CR3 is handled by enter_svm_guest_mode.  */

-   if (is_guest_mode(vcpu))
-   update_guest_cr3 = false;
-   else if (test_bit(VCPU_EXREG_CR3, (ulong 
*)>arch.regs_avail))
-   cr3 = vcpu->arch.cr3;
-   else /* CR3 is already up-to-date.  */
-   update_guest_cr3 = false;
+   if (!test_bit(VCPU_EXREG_CR3, (ulong *)>arch.regs_avail))
+   return;
+   cr3 = vcpu->arch.cr3;
}
  
-	if (update_guest_cr3) {

-   svm->vmcb->save.cr3 = cr3;
-   mark_dirty(svm->vmcb, VMCB_CR);
-   }
+   svm->vmcb->save.cr3 = cr3;
+   mark_dirty(svm->vmcb, VMCB_CR);
  }
  
  static int is_disabled(void)


Re: [PATCH 05/28] KVM: nSVM: correctly inject INIT vmexits

2020-05-29 Thread Krish Sadhukhan



On 5/26/20 10:22 AM, Paolo Bonzini wrote:

The usual drill at this point, except there is no code to remove because this
case was not handled at all.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 27 +++
  1 file changed, 27 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bbf991cfe24b..166b88fc9509 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@
  #include "trace.h"
  #include "mmu.h"
  #include "x86.h"
+#include "lapic.h"
  #include "svm.h"
  
  static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,

@@ -788,11 +789,37 @@ static void nested_svm_intr(struct vcpu_svm *svm)
nested_svm_vmexit(svm);
  }
  
+static inline bool nested_exit_on_init(struct vcpu_svm *svm)

+{
+   return (svm->nested.intercept & (1ULL << INTERCEPT_INIT));
+}
+
+static void nested_svm_init(struct vcpu_svm *svm)


Should this be named nested_svm_inject_init_vmexit in accordance with 
nested_svm_inject_exception_vmexit that you did in patch# 3 ?



+{
+   svm->vmcb->control.exit_code   = SVM_EXIT_INIT;
+   svm->vmcb->control.exit_info_1 = 0;
+   svm->vmcb->control.exit_info_2 = 0;
+
+   nested_svm_vmexit(svm);
+}
+
+
  static int svm_check_nested_events(struct kvm_vcpu *vcpu)
  {
struct vcpu_svm *svm = to_svm(vcpu);
bool block_nested_events =
kvm_event_needs_reinjection(vcpu) || 
svm->nested.nested_run_pending;
+   struct kvm_lapic *apic = vcpu->arch.apic;
+
+   if (lapic_in_kernel(vcpu) &&
+   test_bit(KVM_APIC_INIT, >pending_events)) {
+   if (block_nested_events)
+   return -EBUSY;
+   if (!nested_exit_on_init(svm))
+   return 0;
+   nested_svm_init(svm);
+   return 0;
+   }
  
  	if (vcpu->arch.exception.pending) {

if (block_nested_events)


Re: [PATCH 02/28] KVM: x86: enable event window in inject_pending_event

2020-05-28 Thread Krish Sadhukhan



On 5/26/20 10:22 AM, Paolo Bonzini wrote:

In case an interrupt arrives after nested.check_events but before the
call to kvm_cpu_has_injectable_intr, we could end up enabling the interrupt
window even if the interrupt is actually going to be a vmexit.  This is
useless rather than harmful, but it really complicates reasoning about
SVM's handling of the VINTR intercept.  We'd like to never bother with
the VINTR intercept if V_INTR_MASKING=1 && INTERCEPT_INTR=1, because in
that case there is no interrupt window and we can just exit the nested
guest whenever we want.

As a first step, this patch moves the opening of the interrupt
window inside inject_pending_event.  This consolidates the check for
pending interrupt/NMI/SMI in one place, removing the repeated call to
kvm_cpu_has_injectable_intr.

The main functional change here is that re-injection of still-pending
events will also use req_immediate_exit instead of using interrupt-window
intercepts.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/include/asm/kvm_host.h |   8 +--
  arch/x86/kvm/svm/svm.c  |  24 +++
  arch/x86/kvm/vmx/vmx.c  |  20 +++---
  arch/x86/kvm/x86.c  | 112 +---
  4 files changed, 87 insertions(+), 77 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db261da578f3..7707bd4b0593 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1136,8 +1136,8 @@ struct kvm_x86_ops {
void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
-   bool (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
-   bool (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+   int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+   int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
@@ -1234,10 +1234,10 @@ struct kvm_x86_ops {
  
  	void (*setup_mce)(struct kvm_vcpu *vcpu);
  
-	bool (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);

+   int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
-   int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+   void (*enable_smi_window)(struct kvm_vcpu *vcpu);
  
  	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);

int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9987f6fe9d88..9ac9963405b5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3053,15 +3053,15 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
return ret;
  }
  
-static bool svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)

+static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
  {
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->nested.nested_run_pending)
-   return false;
+   return -EBUSY;
  
  	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */

if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
-   return false;
+   return -EBUSY;
  
  	return !svm_nmi_blocked(vcpu);

  }
@@ -3112,18 +3112,18 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
  }
  
-static bool svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)

+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
  {
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->nested.nested_run_pending)
-   return false;
+   return -EBUSY;
  
  	/*

 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
 * e.g. if the IRQ arrived asynchronously after checking nested events.
 */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
-   return false;
+   return -EBUSY;
  
  	return !svm_interrupt_blocked(vcpu);

  }
@@ -3793,15 +3793,15 @@ bool svm_smi_blocked(struct kvm_vcpu *vcpu)
return is_smm(vcpu);
  }
  
-static bool svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)

+static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
  {
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->nested.nested_run_pending)
-   return false;
+   return -EBUSY;
  
  	/* An SMI must not be injected into L2 if it's supposed to VM-Exit.  */

if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
-   return false;
+  

Re: [PATCH 0/2] Fix issue with not starting nesting guests on my system

2020-05-26 Thread Krish Sadhukhan



On 5/23/20 9:14 AM, Maxim Levitsky wrote:

On my AMD machine I noticed that I can't start any nested guests,
because nested KVM (everything from master git branches) complains
that it can't find msr MSR_IA32_UMWAIT_CONTROL which my system doesn't support
at all anyway.

I traced it to the recently added UMWAIT support to qemu and kvm.
The kvm portion exposed the new MSR in KVM_GET_MSR_INDEX_LIST without
checking that it the underlying feature is supported in CPUID.
It happened to work when non nested because as a precation kvm,
tries to read each MSR on host before adding it to that list,
and when read gets a #GP it ignores it.

When running nested, the L1 hypervisor can be set to ignore unknown
msr read/writes (I need this for some other guests), thus this safety
check doesn't work anymore.

V2: * added a patch to setup correctly the X86_FEATURE_WAITPKG kvm capability
 * dropped the cosmetic fix patch as it is now fixed in kvm/queue

Best regards,
Maxim Levitsky

Maxim Levitsky (2):
   kvm/x86/vmx: enable X86_FEATURE_WAITPKG in KVM capabilities
   kvm/x86: don't expose MSR_IA32_UMWAIT_CONTROL unconditionally

  arch/x86/kvm/vmx/vmx.c | 3 +++
  arch/x86/kvm/x86.c | 4 
  2 files changed, 7 insertions(+)


Nit: The added 'break' statement in patch# 2 is not required.

Reviewed-by: Krish Sadhukhan 


Re: [PATCH 0/7] KVM: SVM: baby steps towards nested state migration

2020-05-18 Thread Krish Sadhukhan



On 5/15/20 10:41 AM, Paolo Bonzini wrote:

Here are some refactorings to prepare for an SVM implementation of
KVM_SET_NESTED_STATE.  It's a prerequisite for that to eliminate
exit_required, moving exceptions to svm_check_nested_events.  However:

- I might work on that soon, because it's needed to handle RSM when
the L1 hypervisor wants to get it from #UD rather than the specific
RSM intercept

- this should be enough to get a quick prototype, that I need in order to
debug a particularly crazy bug and figure out its reproducibility.

So, I am getting these patches out of my todo list for now.

Thanks,

Paolo

Paolo Bonzini (7):
   KVM: SVM: move map argument out of enter_svm_guest_mode
   KVM: SVM: extract load_nested_vmcb_control
   KVM: SVM: extract preparation of VMCB for nested run
   KVM: SVM: save all control fields in svm->nested
   KVM: nSVM: remove HF_VINTR_MASK
   KVM: nSVM: do not reload pause filter fields from VMCB
   KVM: SVM: introduce data structures for nested virt state

  arch/x86/include/asm/kvm_host.h |   1 -
  arch/x86/include/uapi/asm/kvm.h |  26 +++-
  arch/x86/kvm/svm/nested.c   | 115 +---
  arch/x86/kvm/svm/svm.c  |  11 ++-
  arch/x86/kvm/svm/svm.h  |  28 +---
  5 files changed, 116 insertions(+), 65 deletions(-)


Reviewed-by: Krish Sadhukhan 


Re: [PATCH 2/7] KVM: SVM: extract load_nested_vmcb_control

2020-05-15 Thread Krish Sadhukhan



On 5/15/20 10:41 AM, Paolo Bonzini wrote:

When restoring SVM nested state, the control state will be stored already
in svm->nested by KVM_SET_NESTED_STATE.  We will not need to fish it out of
L1's VMCB.  Pull everything into a separate function so that it is
documented which fields are needed.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/svm/nested.c | 45 ++-
  1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 22f75f66084f..e79acc852000 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -225,6 +225,27 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
return true;
  }
  
+static void load_nested_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)



This function only separates a subset of the controls. If the purpose of 
the function is to separate only the controls that are related to 
migration, should it be called something like 
load_nested_state_vmcb_control or something like that ?



+{
+   if (kvm_get_rflags(>vcpu) & X86_EFLAGS_IF)
+   svm->vcpu.arch.hflags |= HF_HIF_MASK;
+   else
+   svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+   svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+
+   svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & 
~0x0fffULL;
+   svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & 
~0x0fffULL;
+
+   /* cache intercepts */
+   svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
+   svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
+   svm->nested.intercept_exceptions = 
nested_vmcb->control.intercept_exceptions;
+   svm->nested.intercept= nested_vmcb->control.intercept;
+
+   svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
+}
+
  void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
  struct vmcb *nested_vmcb)
  {
@@ -232,15 +253,11 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
is_intercept(svm, INTERCEPT_VINTR) ||
is_intercept(svm, INTERCEPT_IRET);
  
-	if (kvm_get_rflags(>vcpu) & X86_EFLAGS_IF)

-   svm->vcpu.arch.hflags |= HF_HIF_MASK;
-   else
-   svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+   svm->nested.vmcb = vmcb_gpa;
+   load_nested_vmcb_control(svm, nested_vmcb);
  
-	if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {

-   svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+   if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(>vcpu);
-   }
  
  	/* Load the nested guest state */

svm->vmcb->save.es = nested_vmcb->save.es;
@@ -275,25 +292,15 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
svm->vcpu.arch.dr6  = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
  
-	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;

-   svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & 
~0x0fffULL;
-
-   /* cache intercepts */
-   svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
-   svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
-   svm->nested.intercept_exceptions = 
nested_vmcb->control.intercept_exceptions;
-   svm->nested.intercept= nested_vmcb->control.intercept;
-
svm_flush_tlb(>vcpu);
-   svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | 
V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
  
-	svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;

svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
  
+	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;

svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
@@ -314,8 +321,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 
vmcb_gpa,
 */
recalc_intercepts(svm);
  
-	svm->nested.vmcb = vmcb_gpa;

-
/*
 * If L1 had a pending IRQ/NMI before executing VMRUN,
 * which wasn't delivered because it was disallowed (e.g.


Re: [PATCH 1/4] KVM: VMX: rename {vmx,nested_vmx}_vcpu_setup functions

2019-10-16 Thread Krish Sadhukhan



On 10/15/19 6:27 PM, Xiaoyao Li wrote:

On 10/16/2019 6:05 AM, Krish Sadhukhan wrote:



On 10/15/2019 09:40 AM, Xiaoyao Li wrote:

Rename {vmx,nested_vmx}_vcpu_setup to {vmx,nested_vmx}_vmcs_setup,
to match what they really do.

No functional change.

Signed-off-by: Xiaoyao Li 
---
  arch/x86/kvm/vmx/nested.c | 2 +-
  arch/x86/kvm/vmx/nested.h | 2 +-
  arch/x86/kvm/vmx/vmx.c    | 9 +++--
  3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5e231da00310..7935422d311f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5768,7 +5768,7 @@ static int vmx_set_nested_state(struct 
kvm_vcpu *vcpu,

  return ret;
  }
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_vmcs_setup(void)
  {
  if (enable_shadow_vmcs) {
  vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..2be1ba7482c9 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct 
nested_vmx_msrs *msrs, u32 ept_caps,

  bool apicv);
  void nested_vmx_hardware_unsetup(void);
  __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct 
kvm_vcpu *));

-void nested_vmx_vcpu_setup(void);
+void nested_vmx_vmcs_setup(void);
  void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
  int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool 
from_vmentry);
  bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 
exit_reason);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e660e28e9ae0..58b77a882426 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4161,15 +4161,12 @@ static void ept_set_mmio_spte_mask(void)
  #define VMX_XSS_EXIT_BITMAP 0
-/*
- * Sets up the vmcs for emulated real mode.
- */
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
  {
  int i;
  if (nested)
-    nested_vmx_vcpu_setup();
+    nested_vmx_vmcs_setup();
  if (cpu_has_vmx_msr_bitmap())
  vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -6777,7 +6774,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct 
kvm *kvm, unsigned int id)

  cpu = get_cpu();
  vmx_vcpu_load(>vcpu, cpu);
  vmx->vcpu.cpu = cpu;
-    vmx_vcpu_setup(vmx);
+    vmx_vmcs_setup(vmx);
  vmx_vcpu_put(>vcpu);
  put_cpu();
  if (cpu_need_virtualize_apic_accesses(>vcpu)) {


May be we should rename vmx_vcpu_reset() to vmx_vmcs_reset()  as well  ?


Not really. vmx_vcpu_reset() not only resets vmcs but also the 
emulated field of vmx vcpu.


It would be a better organization of the code if the resetting of the 
VMCS fields were placed in a separate function.



Reviewed-by: Krish Sadhukhan 



Re: [PATCH 2/4] KVM: VMX: Setup MSR bitmap only when has msr_bitmap capability

2019-10-15 Thread Krish Sadhukhan




On 10/15/2019 09:40 AM, Xiaoyao Li wrote:

Move the MSR bitmap setup codes to vmx_vmcs_setup() and only setup them
when hardware has msr_bitmap capability.

Signed-off-by: Xiaoyao Li 
---
  arch/x86/kvm/vmx/vmx.c | 39 ---
  1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 58b77a882426..7051511c27c2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4164,12 +4164,30 @@ static void ept_set_mmio_spte_mask(void)
  static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
  {
int i;
+   unsigned long *msr_bitmap;
  
  	if (nested)

nested_vmx_vmcs_setup();
  
-	if (cpu_has_vmx_msr_bitmap())

-   vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
+   if (cpu_has_vmx_msr_bitmap()) {
+   msr_bitmap = vmx->vmcs01.msr_bitmap;
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, 
MSR_TYPE_R);


vmx_disable_intercept_for_msr() also calls cpu_has_vmx_msr_bitmap(), 
which means we are repeating the check. A cleaner approach is to remove 
the call to cpu_has_vmx_msr_bitmap()  from 
vmx_disable_intercept_for_msr()  and let its callers do the check just 
like you are doing here.



+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+   if (kvm_cstate_in_guest(vmx->vcpu.kvm)) {
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C1_RES, MSR_TYPE_R);
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
+   }
+
+   vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+   }
+   vmx->msr_bitmap_mode = 0;
  
  	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
  
@@ -6697,7 +6715,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)

  {
int err;
struct vcpu_vmx *vmx;
-   unsigned long *msr_bitmap;
int cpu;
  
  	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,

@@ -6754,22 +6771,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (err < 0)
goto free_msrs;
  
-	msr_bitmap = vmx->vmcs01.msr_bitmap;

-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, 
MSR_TYPE_RW);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, 
MSR_TYPE_RW);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, 
MSR_TYPE_RW);
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, 
MSR_TYPE_RW);
-   if (kvm_cstate_in_guest(kvm)) {
-   vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, 
MSR_TYPE_R);
-   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
-   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
-   vmx_disable_intercept_for_msr(msr_bitmap, 
MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
-   }
-   vmx->msr_bitmap_mode = 0;
-
vmx->loaded_vmcs = >vmcs01;
cpu = get_cpu();
vmx_vcpu_load(>vcpu, cpu);




Re: [PATCH 1/4] KVM: VMX: rename {vmx,nested_vmx}_vcpu_setup functions

2019-10-15 Thread Krish Sadhukhan




On 10/15/2019 09:40 AM, Xiaoyao Li wrote:

Rename {vmx,nested_vmx}_vcpu_setup to {vmx,nested_vmx}_vmcs_setup,
to match what they really do.

No functional change.

Signed-off-by: Xiaoyao Li 
---
  arch/x86/kvm/vmx/nested.c | 2 +-
  arch/x86/kvm/vmx/nested.h | 2 +-
  arch/x86/kvm/vmx/vmx.c| 9 +++--
  3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5e231da00310..7935422d311f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5768,7 +5768,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return ret;
  }
  
-void nested_vmx_vcpu_setup(void)

+void nested_vmx_vmcs_setup(void)
  {
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..2be1ba7482c9 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, 
u32 ept_caps,
bool apicv);
  void nested_vmx_hardware_unsetup(void);
  __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu 
*));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_vmcs_setup(void);
  void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
  int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
  bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e660e28e9ae0..58b77a882426 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4161,15 +4161,12 @@ static void ept_set_mmio_spte_mask(void)
  
  #define VMX_XSS_EXIT_BITMAP 0
  
-/*

- * Sets up the vmcs for emulated real mode.
- */
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
  {
int i;
  
  	if (nested)

-   nested_vmx_vcpu_setup();
+   nested_vmx_vmcs_setup();
  
  	if (cpu_has_vmx_msr_bitmap())

vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -6777,7 +6774,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
cpu = get_cpu();
vmx_vcpu_load(>vcpu, cpu);
vmx->vcpu.cpu = cpu;
-   vmx_vcpu_setup(vmx);
+   vmx_vmcs_setup(vmx);
vmx_vcpu_put(>vcpu);
put_cpu();
if (cpu_need_virtualize_apic_accesses(>vcpu)) {


May be we should rename vmx_vcpu_reset() to vmx_vmcs_reset()  as well  ?


Re: [PATCH] KVM: nVMX: cleanup and fix host 64-bit mode checks

2019-09-25 Thread Krish Sadhukhan




On 09/25/2019 09:47 AM, Jim Mattson wrote:

On Wed, Sep 25, 2019 at 9:34 AM Paolo Bonzini  wrote:

KVM was incorrectly checking vmcs12->host_ia32_efer even if the "load
IA32_EFER" exit control was reset.  Also, some checks were not using
the new CC macro for tracing.

Cleanup everything so that the vCPU's 64-bit mode is determined
directly from EFER_LMA and the VMCS checks are based on that, which
matches section 26.2.4 of the SDM.

Cc: Sean Christopherson 
Cc: Jim Mattson 
Cc: Krish Sadhukhan 
Fixes: 5845038c111db27902bc220a4f70070fe945871c
Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/vmx/nested.c | 53 ---
  1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 70d59d9304f2..e108847f6cf8 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2664,8 +2664,26 @@ static int nested_vmx_check_host_state(struct kvm_vcpu 
*vcpu,
 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
 return -EINVAL;

-   ia32e = (vmcs12->vm_exit_controls &
-VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+#ifdef CONFIG_X86_64
+   ia32e = !!(vcpu->arch.efer & EFER_LMA);
+#else
+   if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE))
+   return -EINVAL;

This check is redundant, since it is checked in the else block below.


Should we be re-using is_long_mode() instead of duplicating the code ?




+
+   ia32e = false;
+#endif
+
+   if (ia32e) {
+   if (CC(!(vmcs12->vm_exit_controls & 
VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
+   CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+   return -EINVAL;
+   } else {
+   if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) 
||
+   CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+   CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
+   CC(((vmcs12->host_rip) >> 32) & 0x))

The mask shouldn't be necessary.


+   return -EINVAL;
+   }

 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | 
SEGMENT_TI_MASK)) ||
 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | 
SEGMENT_TI_MASK)) ||
@@ -2684,35 +2702,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu 
*vcpu,
 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
-   CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
-   return -EINVAL;
-
-   if (!(vmcs12->host_ia32_efer & EFER_LMA) &&
-   ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
-   (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE))) {
-   return -EINVAL;
-   }
-
-   if ((vmcs12->host_ia32_efer & EFER_LMA) &&
-   !(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) {
-   return -EINVAL;
-   }
-
-   if (!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) &&
-   ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
-   (vmcs12->host_cr4 & X86_CR4_PCIDE) ||
-   (((vmcs12->host_rip) >> 32) & 0x))) {
-   return -EINVAL;
-   }
-
-   if ((vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) &&
-   ((!(vmcs12->host_cr4 & X86_CR4_PAE)) ||
-   (is_noncanonical_address(vmcs12->host_rip, vcpu {
-   return -EINVAL;
-   }
-#else
-   if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE ||
-   vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
+   CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
+   CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
 return -EINVAL;
  #endif

--
1.8.3.1


Reviewed-by: Jim Mattson 




Re: [PATCH v2] KVM: x86: Unconditionally call x86 ops that are always implemented

2019-08-02 Thread Krish Sadhukhan




On 08/02/2019 03:06 PM, Sean Christopherson wrote:

Remove a few stale checks for non-NULL ops now that the ops in question
are implemented by both VMX and SVM.

Note, this is **not** stable material, the Fixes tags are there purely
to show when a particular op was first supported by both VMX and SVM.

Fixes: 74f169090b6f ("kvm/svm: Setup MCG_CAP on AMD properly")
Fixes: b31c114b82b2 ("KVM: X86: Provide a capability to disable PAUSE 
intercepts")
Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted 
interrupt")
Cc: Krish Sadhukhan 
Signed-off-by: Sean Christopherson 
---

v2: Give update_pi_iret the same treatment [Krish].

  arch/x86/kvm/x86.c | 13 +++--
  1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01e18caac825..e7c993f0cbed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3506,8 +3506,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu 
*vcpu,
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
  
-	if (kvm_x86_ops->setup_mce)

-   kvm_x86_ops->setup_mce(vcpu);
+   kvm_x86_ops->setup_mce(vcpu);
  out:
return r;
  }
@@ -9313,10 +9312,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
  
-	if (kvm_x86_ops->vm_init)

-   return kvm_x86_ops->vm_init(kvm);
-
-   return 0;
+   return kvm_x86_ops->vm_init(kvm);
  }
  
  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)

@@ -9992,7 +9988,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
  
  bool kvm_arch_has_irq_bypass(void)


Now that this is returning true always and that this is called only in 
kvm_irqfd_assign(), this can perhaps be removed altogether ?



  {
-   return kvm_x86_ops->update_pi_irte != NULL;
+   return true;
  }
  
  int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,

@@ -10032,9 +10028,6 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
  int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
   uint32_t guest_irq, bool set)
  {
-   if (!kvm_x86_ops->update_pi_irte)
-   return -EINVAL;
-
return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
  }
  


Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: x86: Unconditionally call x86 ops that are always implemented

2019-08-01 Thread Krish Sadhukhan




On 08/01/2019 09:46 AM, Sean Christopherson wrote:

Remove two stale checks for non-NULL ops now that they're implemented by
both VMX and SVM.

Fixes: 74f169090b6f ("kvm/svm: Setup MCG_CAP on AMD properly")
Fixes: b31c114b82b2 ("KVM: X86: Provide a capability to disable PAUSE 
intercepts")
Signed-off-by: Sean Christopherson 
---
  arch/x86/kvm/x86.c | 8 ++--
  1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01e18caac825..2c25a19d436f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3506,8 +3506,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu 
*vcpu,
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
  
-	if (kvm_x86_ops->setup_mce)

-   kvm_x86_ops->setup_mce(vcpu);
+   kvm_x86_ops->setup_mce(vcpu);
  out:
return r;
  }
@@ -9313,10 +9312,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
  
-	if (kvm_x86_ops->vm_init)

-   return kvm_x86_ops->vm_init(kvm);
-
-   return 0;
+   return kvm_x86_ops->vm_init(kvm);
  }
  
  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)


The following two ops are also implemented by both VMX and SVM:

        update_cr8_intercept
        update_pi_irte


Re: [PATCH] KVM: x86: init x2apic_enabled() once

2019-07-23 Thread Krish Sadhukhan




On 07/23/2019 06:06 AM, lufe...@163.com wrote:

From: luferry 

x2apic_eanbled() costs about 200 cycles
when guest trigger halt pretty high, pi ops in hotpath

Signed-off-by: luferry 
---
  arch/x86/kvm/vmx/vmx.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d98eac371c0a..e17dbf011e47 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -186,6 +186,8 @@ static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
  static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
  static DEFINE_MUTEX(vmx_l1d_flush_mutex);
  
+static int __read_mostly host_x2apic_enabled;

+
  /* Storage for pre module init parameter parsing */
  static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = 
VMENTER_L1D_FLUSH_AUTO;
  
@@ -1204,7 +1206,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
  
  		dest = cpu_physical_id(cpu);
  
-		if (x2apic_enabled())

+   if (host_x2apic_enabled)
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
@@ -7151,7 +7153,7 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
  
  		dest = cpu_physical_id(vcpu->cpu);
  
-		if (x2apic_enabled())

+   if (host_x2apic_enabled)
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
@@ -7221,7 +7223,7 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 */
dest = cpu_physical_id(vcpu->pre_pcpu);
  
-		if (x2apic_enabled())

+   if (host_x2apic_enabled)
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
@@ -7804,6 +7806,8 @@ static int __init vmx_init(void)
}
  #endif
  
+	host_x2apic_enabled = x2apic_enabled();

+
r = kvm_init(_x86_ops, sizeof(struct vcpu_vmx),
 __alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)


Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: nVMX: Rename prepare_vmcs02_*_full to prepare_vmcs02_*_extra

2019-06-06 Thread Krish Sadhukhan




On 06/06/2019 11:41 AM, Sean Christopherson wrote:

On Thu, Jun 06, 2019 at 05:24:12PM +0200, Paolo Bonzini wrote:

These function do not prepare the entire state of the vmcs02, only the
rarely needed parts.  Rename them to make this clearer.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/vmx/nested.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 84438cf23d37..fd8150ef6cce 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1955,7 +1955,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx 
*vmx)
vmx_set_constant_host_state(vmx);
  }
  
-static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,

+static void prepare_vmcs02_early_extra(struct vcpu_vmx *vmx,

Or maybe 'uncommon', 'rare' or 'ext'?  I don't I particularly love any of
the names, but they're all better than 'full'.


The big chunk of the work in this function is done via 
prepare_vmcs02_constant_state(). It seems cleaner to get rid of 
prepare_vmcs02_early_full(), call prepare_vmcs02_constant_state() 
directly from prepare_vmcs02_early() and move the three vmcs_write16() 
calls to prepare_vmcs02_early().




Reviewed-by: Sean Christopherson 


  struct vmcs12 *vmcs12)
  {
prepare_vmcs02_constant_state(vmx);
@@ -1976,7 +1976,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, 
struct vmcs12 *vmcs12)
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
  
  	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)

-   prepare_vmcs02_early_full(vmx, vmcs12);
+   prepare_vmcs02_early_extra(vmx, vmcs12);
  
  	/*

 * PIN CONTROLS
@@ -2130,7 +2130,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, 
struct vmcs12 *vmcs12)
}
  }
  
-static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)

+static void prepare_vmcs02_extra(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
  {
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
  
@@ -2254,7 +2254,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,

struct vcpu_vmx *vmx = to_vmx(vcpu);
  
  	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {

-   prepare_vmcs02_full(vmx, vmcs12);
+   prepare_vmcs02_extra(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
}
  
--

1.8.3.1





Re: [PATCH] KVM: nVMX: allow tests to use bad virtual-APIC page address

2019-04-19 Thread Krish Sadhukhan




On 04/15/2019 06:35 AM, Paolo Bonzini wrote:

As mentioned in the comment, there are some special cases where we can simply
clear the TPR shadow bit from the CPU-based execution controls in the vmcs02.
Handle them so that we can remove some XFAILs from vmx.flat.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/vmx/nested.c | 25 -
  arch/x86/kvm/vmx/vmx.c|  2 +-
  arch/x86/kvm/vmx/vmx.h|  2 ++
  3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7ec9bb1dd723..a22af5a85540 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu 
*vcpu)
/*
 * If translation failed, VM entry will fail because
 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
-* Failing the vm entry is _not_ what the processor
-* does but it's basically the only possibility we
-* have.  We could still enter the guest if CR8 load
-* exits are enabled, CR8 store exits are enabled, and
-* virtualize APIC access is disabled; in this case
-* the processor would never use the TPR shadow and we
-* could simply clear the bit from the execution
-* control.  But such a configuration is useless, so
-* let's keep the code simple.
 */
if (!is_error_page(page)) {
vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+   } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+  nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) 
&&
+  !nested_cpu_has2(vmcs12, 
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+   /*
+* The processor will never use the TPR shadow, simply
+* clear the bit from the execution control.  Such a
+* configuration is useless, but it happens in tests.
+* For any other configuration, failing the vm entry is
+* _not_ what the processor does but it's basically the
+* only possibility we have.
+*/
+   vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+   CPU_BASED_TPR_SHADOW);
+   } else {
+   printk("bad virtual-APIC page address\n");
+   dump_vmcs();
}
}
  
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index f8054dc1de65..14cacfd7ffd0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
   vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
  }
  
-static void dump_vmcs(void)

+void dump_vmcs(void)
  {
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a1e00d0a2482..f879529906b4 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx 
*vmx)
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
  }
  
+void dump_vmcs(void);

+
  #endif /* __KVM_X86_VMX_H */


Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: vmx: print more APICv fields in dump_vmcs

2019-04-16 Thread Krish Sadhukhan




On 04/15/2019 06:35 AM, Paolo Bonzini wrote:

The SVI, RVI, virtual-APIC page address and APIC-access page address fields
were left out of dump_vmcs.  Add them.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/vmx/vmx.c | 13 +++--
  1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae8..f8054dc1de65 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5723,8 +5723,17 @@ static void dump_vmcs(void)
if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
pr_err("TSC Multiplier = 0x%016llx\n",
   vmcs_read64(TSC_MULTIPLIER));
-   if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
-   pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+   if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
+   if (secondary_exec_control & 
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
+   u16 status = vmcs_read16(GUEST_INTR_STATUS);
+   pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 
0xff);
+   }
+   pr_err(KERN_CONT "TPR Threshold = 0x%02x\n", 
vmcs_read32(TPR_THRESHOLD));
+   if (secondary_exec_control & 
(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ 
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE))
+   pr_err("APIC-access addr = 0x%016llx ", 
vmcs_read64(APIC_ACCESS_ADDR));
+   pr_err(KERN_CONT "virt-APIC addr=0x%016llx\n", 
vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
+   }
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))


Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: x86: optimize check for valid PAT value

2019-04-10 Thread Krish Sadhukhan




On 04/10/2019 02:55 AM, Paolo Bonzini wrote:

This check will soon be done on every nested vmentry and vmexit,
"parallelize" it using bitwise operations.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/mtrr.c| 10 +-
  arch/x86/kvm/vmx/vmx.c |  2 +-
  arch/x86/kvm/x86.h |  8 
  3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e9ea2d45ae66..9f72cc427158 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
  }
  
-static bool valid_pat_type(unsigned t)

-{
-   return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
-}
-
  static bool valid_mtrr_type(unsigned t)
  {
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
@@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return false;
  
  	if (msr == MSR_IA32_CR_PAT) {

-   for (i = 0; i < 8; i++)
-   if (!valid_pat_type((data >> (i * 8)) & 0xff))
-   return false;
-   return true;
+   return kvm_pat_valid(data);
} else if (msr == MSR_MTRRdefType) {
if (data & ~0xcff)
return false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b6c533afbf27..b74679732cfc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1891,7 +1891,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-   if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+   if (!kvm_pat_valid(data))
return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d..7bc7ac9d2a44 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -347,4 +347,12 @@ static inline void kvm_after_interrupt(struct kvm_vcpu 
*vcpu)
__this_cpu_write(current_vcpu, NULL);
  }
  
+static inline bool kvm_pat_valid(u64 data)

+{
+   if (data & 0xF8F8F8F8F8F8F8F8)
+   return false;
+   /* 0, 1, 4, 5, 6, 7 are valid values.  */
+   return (data | ((data & 0x0202020202020202) << 1)) == data;
+}
+
  #endif


Reviewed-by: Krish Sadhukhan 


Re: [PATCH v2] KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned

2018-10-22 Thread Krish Sadhukhan




On 10/20/2018 03:50 PM, KarimAllah Ahmed wrote:

The spec only requires the posted interrupt descriptor address to be
64-bytes aligned (i.e. bits[0:5] == 0). Using page_address_valid also
forces the address to be page aligned.

Only validate that the address does not cross the maximum physical address
without enforcing a page alignment.

v1 -> v2:
- Add a missing parenthesis (dropped while merging!)

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: H. Peter Anvin 
Cc: x...@kernel.org
Cc: k...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Fixes: 6de84e581c0 ("nVMX x86: check posted-interrupt descriptor addresss on vmentry 
of L2")
Signed-off-by: KarimAllah Ahmed 
---
  arch/x86/kvm/vmx.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 38f1a16..bb0fcdb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11667,7 +11667,7 @@ static int nested_vmx_check_apicv_controls(struct 
kvm_vcpu *vcpu,
!nested_exit_intr_ack_set(vcpu) ||
(vmcs12->posted_intr_nv & 0xff00) ||
(vmcs12->posted_intr_desc_addr & 0x3f) ||
-   (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr
+   (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu
return -EINVAL;
  
  	/* tpr shadow is needed by all apicv features. */


Reviewed-by: Krish Sadhuhan 


Re: [PATCH v2] KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned

2018-10-22 Thread Krish Sadhukhan




On 10/20/2018 03:50 PM, KarimAllah Ahmed wrote:

The spec only requires the posted interrupt descriptor address to be
64-bytes aligned (i.e. bits[0:5] == 0). Using page_address_valid also
forces the address to be page aligned.

Only validate that the address does not cross the maximum physical address
without enforcing a page alignment.

v1 -> v2:
- Add a missing parenthesis (dropped while merging!)

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: H. Peter Anvin 
Cc: x...@kernel.org
Cc: k...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Fixes: 6de84e581c0 ("nVMX x86: check posted-interrupt descriptor addresss on vmentry 
of L2")
Signed-off-by: KarimAllah Ahmed 
---
  arch/x86/kvm/vmx.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 38f1a16..bb0fcdb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11667,7 +11667,7 @@ static int nested_vmx_check_apicv_controls(struct 
kvm_vcpu *vcpu,
!nested_exit_intr_ack_set(vcpu) ||
(vmcs12->posted_intr_nv & 0xff00) ||
(vmcs12->posted_intr_desc_addr & 0x3f) ||
-   (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr
+   (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu
return -EINVAL;
  
  	/* tpr shadow is needed by all apicv features. */


Reviewed-by: Krish Sadhuhan 


Re: [PATCH] x86/kvm/nVMX: avoid redundant double assignment of nested_run_pending

2018-08-24 Thread Krish Sadhukhan




On 08/23/2018 09:24 AM, Vitaly Kuznetsov wrote:

nested_run_pending is set 20 lines above and check_vmentry_prereqs()/
check_vmentry_postreqs() don't seem to be resetting it (the later, however,
checks it).

Signed-off-by: Vitaly Kuznetsov 
---
  arch/x86/kvm/vmx.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6c3514750d0c..8a63b8cf9458 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -14221,9 +14221,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, _qual))
return -EINVAL;
  
-	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)

-   vmx->nested.nested_run_pending = 1;
-
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)

Reviewed-by: Krish Sadhukhan 


Re: [PATCH] x86/kvm/nVMX: avoid redundant double assignment of nested_run_pending

2018-08-24 Thread Krish Sadhukhan




On 08/23/2018 09:24 AM, Vitaly Kuznetsov wrote:

nested_run_pending is set 20 lines above and check_vmentry_prereqs()/
check_vmentry_postreqs() don't seem to be resetting it (the later, however,
checks it).

Signed-off-by: Vitaly Kuznetsov 
---
  arch/x86/kvm/vmx.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6c3514750d0c..8a63b8cf9458 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -14221,9 +14221,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, _qual))
return -EINVAL;
  
-	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)

-   vmx->nested.nested_run_pending = 1;
-
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)

Reviewed-by: Krish Sadhukhan 


Re: [PATCH] KVM: vmx: unify adjacent #ifdefs

2018-04-05 Thread Krish Sadhukhan



On 04/04/2018 09:59 AM, Paolo Bonzini wrote:

vmx_save_host_state has multiple ifdefs for CONFIG_X86_64 that have
no other code between them.  Simplify by reducing them to a single
conditional.

Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
  arch/x86/kvm/vmx.c | 10 +++---
  1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 939c8724feb4..a1572461786e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2392,20 +2392,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  #ifdef CONFIG_X86_64
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
-#endif
  
-#ifdef CONFIG_X86_64

vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#else
-   vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
-   vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
-#endif
  
-#ifdef CONFIG_X86_64

vmx->msr_host_kernel_gs_base = current->thread.gsbase;
if (is_long_mode(>vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#else
+   vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+   vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
  #endif
if (boot_cpu_has(X86_FEATURE_MPX))
rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);

Reviewed-by: Krish Sadhukhan <krish.sadhuk...@oracle.com>


Re: [PATCH] KVM: vmx: unify adjacent #ifdefs

2018-04-05 Thread Krish Sadhukhan



On 04/04/2018 09:59 AM, Paolo Bonzini wrote:

vmx_save_host_state has multiple ifdefs for CONFIG_X86_64 that have
no other code between them.  Simplify by reducing them to a single
conditional.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/vmx.c | 10 +++---
  1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 939c8724feb4..a1572461786e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2392,20 +2392,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  #ifdef CONFIG_X86_64
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
-#endif
  
-#ifdef CONFIG_X86_64

vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#else
-   vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
-   vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
-#endif
  
-#ifdef CONFIG_X86_64

vmx->msr_host_kernel_gs_base = current->thread.gsbase;
if (is_long_mode(>vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#else
+   vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+   vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
  #endif
if (boot_cpu_has(X86_FEATURE_MPX))
rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);

Reviewed-by: Krish Sadhukhan 


Re: [PATCH v2 1/4] x86/kvm/cpuid: Fix CPUID function for word 6 (80000001_ECX)

2017-11-06 Thread Krish Sadhukhan

On 11/06/2017 09:44 AM, Janakarajan Natarajan wrote:


The function for CPUID 8001 ECX is set to 0xc001. Set it to
0x8001.

Signed-off-by: Janakarajan Natarajan <janakarajan.natara...@amd.com>
---
  arch/x86/kvm/cpuid.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 0bc5c13..b21b1d2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -43,7 +43,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
[CPUID_C000_0001_EDX] = {0xc001, 0, CPUID_EDX},
-   [CPUID_8000_0001_ECX] = {0xc001, 0, CPUID_ECX},
+   [CPUID_8000_0001_ECX] = {0x8001, 0, CPUID_ECX},
[CPUID_7_0_EBX]   = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX]   = {   0xd, 1, CPUID_EAX},
[CPUID_F_0_EDX]   = {   0xf, 0, CPUID_EDX},

Reviewed-by: Krish Sadhukhan <krish.sadhuk...@oracle.com>


Re: [PATCH v2 1/4] x86/kvm/cpuid: Fix CPUID function for word 6 (80000001_ECX)

2017-11-06 Thread Krish Sadhukhan

On 11/06/2017 09:44 AM, Janakarajan Natarajan wrote:


The function for CPUID 8001 ECX is set to 0xc001. Set it to
0x8001.

Signed-off-by: Janakarajan Natarajan 
---
  arch/x86/kvm/cpuid.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 0bc5c13..b21b1d2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -43,7 +43,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
[CPUID_C000_0001_EDX] = {0xc001, 0, CPUID_EDX},
-   [CPUID_8000_0001_ECX] = {0xc001, 0, CPUID_ECX},
+   [CPUID_8000_0001_ECX] = {0x8001, 0, CPUID_ECX},
[CPUID_7_0_EBX]   = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX]   = {   0xd, 1, CPUID_EAX},
[CPUID_F_0_EDX]   = {   0xf, 0, CPUID_EDX},

Reviewed-by: Krish Sadhukhan 


Re: [PATCH v5 3/3] KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure

2017-11-03 Thread Krish Sadhukhan
1539,6 +11546,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, 
u32 exit_reason,
 * accordingly.
 */
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+   load_vmcs12_mmu_host_state(vcpu, vmcs12);
+
/*
 * The emulated instruction was already skipped in
     * nested_vmx_run, but the updated RIP was never

Reviewed-by: Krish Sadhukhan <krish.sadhuk...@oracle.com>


Re: [PATCH v5 3/3] KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure

2017-11-03 Thread Krish Sadhukhan
*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+   load_vmcs12_mmu_host_state(vcpu, vmcs12);
+
/*
 * The emulated instruction was already skipped in
 * nested_vmx_run, but the updated RIP was never

Reviewed-by: Krish Sadhukhan 


Re: [PATCH v5 2/3] KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry

2017-11-03 Thread Krish Sadhukhan



On 11/02/2017 11:40 PM, Wanpeng Li wrote:

2017-11-03 14:31 GMT+08:00 Krish Sadhukhan <krish.sadhuk...@oracle.com>:


On 11/02/2017 05:50 PM, Wanpeng Li wrote:

From: Wanpeng Li <wanpeng...@hotmail.com>

According to the SDM, if the "load IA32_BNDCFGS" VM-entry controls is 1,
the
following checks are performed on the field for the IA32_BNDCFGS MSR:
   - Bits reserved in the IA32_BNDCFGS MSR must be 0.
   - The linear address in bits 63:12 must be canonical.

Reviewed-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: Radim Krčmář <rkrc...@redhat.com>
Cc: Jim Mattson <jmatt...@google.com>
Signed-off-by: Wanpeng Li <wanpeng...@hotmail.com>
---
v3 -> v4:
   * simply condition
   * use && instead of nested "if"s

   arch/x86/kvm/vmx.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e6c8ffa..6cf3972 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10805,6 +10805,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12,
 return 1;
 }
   + if (kvm_mpx_supported() &&
+   (is_noncanonical_address(vmcs12->guest_bndcfgs &
PAGE_MASK, vcpu) ||
+   (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+   return 1;
+
 return 0;
   }


Hi Wanpeng,
   The SDM check is performed only when "load IA32_BNDCFGS" VM-entry control
is 1. But vmx_mpx_supported() returns true when both "load IA32_BNDCFGS" and
"store IA32_BNDCFGS" VM-entry controls are 1. Therefore your check is
performed when both controls are 1. Did I miss something here ?

https://lkml.org/lkml/2017/11/2/748 Paolo hopes the simplification.

Regards,
Wanpeng Li

I got that simplification and your changes look good to me.


However, I am still curious know the reason why vmx_mpx_supported() 
returns true only when both controls are true whereas the SDM states the 
following:


   "IA32_BNDCFGS (64 bits). This field is supported only on processors 
that support either the 1-setting of the  “load IA32_BNDCFGS” VM-entry 
control or that of the “clear IA32_BNDCFGS” VM-exit control."


Thanks,
Krish


Re: [PATCH v5 2/3] KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry

2017-11-03 Thread Krish Sadhukhan



On 11/02/2017 11:40 PM, Wanpeng Li wrote:

2017-11-03 14:31 GMT+08:00 Krish Sadhukhan :


On 11/02/2017 05:50 PM, Wanpeng Li wrote:

From: Wanpeng Li 

According to the SDM, if the "load IA32_BNDCFGS" VM-entry controls is 1,
the
following checks are performed on the field for the IA32_BNDCFGS MSR:
   - Bits reserved in the IA32_BNDCFGS MSR must be 0.
   - The linear address in bits 63:12 must be canonical.

Reviewed-by: Konrad Rzeszutek Wilk 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Jim Mattson 
Signed-off-by: Wanpeng Li 
---
v3 -> v4:
   * simply condition
   * use && instead of nested "if"s

   arch/x86/kvm/vmx.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e6c8ffa..6cf3972 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10805,6 +10805,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12,
 return 1;
 }
   + if (kvm_mpx_supported() &&
+   (is_noncanonical_address(vmcs12->guest_bndcfgs &
PAGE_MASK, vcpu) ||
+   (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+   return 1;
+
 return 0;
   }


Hi Wanpeng,
   The SDM check is performed only when "load IA32_BNDCFGS" VM-entry control
is 1. But vmx_mpx_supported() returns true when both "load IA32_BNDCFGS" and
"store IA32_BNDCFGS" VM-entry controls are 1. Therefore your check is
performed when both controls are 1. Did I miss something here ?

https://lkml.org/lkml/2017/11/2/748 Paolo hopes the simplification.

Regards,
Wanpeng Li

I got that simplification and your changes look good to me.


However, I am still curious know the reason why vmx_mpx_supported() 
returns true only when both controls are true whereas the SDM states the 
following:


   "IA32_BNDCFGS (64 bits). This field is supported only on processors 
that support either the 1-setting of the  “load IA32_BNDCFGS” VM-entry 
control or that of the “clear IA32_BNDCFGS” VM-exit control."


Thanks,
Krish