From: Tom Lendacky <[email protected]>

Provide initial support for accessing the GHCB when needing to access
registers for an SEV-ES guest. The support consists of:

  - Accessing the GHCB instead of the VMSA when reading and writing
    guest registers (after the VMSA has been encrypted).
  - Creating register access override functions for reading and writing
    guest registers from the common KVM support.
  - Allocating pages for the VMSA and GHCB when creating each vCPU
    - The VMSA page holds the encrypted VMSA for the vCPU
    - The GHCB page is used to hold a copy of the guest GHCB during
      VMGEXIT processing.

Signed-off-by: Tom Lendacky <[email protected]>
---
 arch/x86/include/asm/kvm_host.h  |   7 ++
 arch/x86/include/asm/msr-index.h |   1 +
 arch/x86/kvm/kvm_cache_regs.h    |  30 +++++--
 arch/x86/kvm/svm/svm.c           | 138 ++++++++++++++++++++++++++++++-
 arch/x86/kvm/svm/svm.h           |  65 ++++++++++++++-
 5 files changed, 230 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5303dbc5c9bc..c900992701d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -788,6 +788,9 @@ struct kvm_vcpu_arch {
 
        /* AMD MSRC001_0015 Hardware Configuration */
        u64 msr_hwcr;
+
+       /* SEV-ES support */
+       bool vmsa_encrypted;
 };
 
 struct kvm_lpage_info {
@@ -1227,6 +1230,10 @@ struct kvm_x86_ops {
        int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 
        void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+       void (*reg_read_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+       void (*reg_write_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg,
+                                  unsigned long val);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 249a4147c4b2..16f5b20bb099 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -466,6 +466,7 @@
 #define MSR_AMD64_IBSBRTARGET          0xc001103b
 #define MSR_AMD64_IBSOPDATA4           0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX    8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH                0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB          0xc0010130
 #define MSR_AMD64_SEV                  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT      0
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index cfe83d4ae625..e87eb90999d5 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,15 +9,21 @@
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
         | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
 
-#define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                \
-static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
-{                                                                            \
-       return vcpu->arch.regs[VCPU_REGS_##uname];                            \
-}                                                                            \
-static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,       \
-                                               unsigned long val)            \
-{                                                                            \
-       vcpu->arch.regs[VCPU_REGS_##uname] = val;                             \
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                  
\
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu) 
\
+{                                                                              
\
+       if (kvm_x86_ops.reg_read_override)                                      
\
+               kvm_x86_ops.reg_read_override(vcpu, VCPU_REGS_##uname);         
\
+                                                                               
\
+       return vcpu->arch.regs[VCPU_REGS_##uname];                              
\
+}                                                                              
\
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,         
\
+                                               unsigned long val)              
\
+{                                                                              
\
+       if (kvm_x86_ops.reg_write_override)                                     
\
+               kvm_x86_ops.reg_write_override(vcpu, VCPU_REGS_##uname, val);   
\
+                                                                               
\
+       vcpu->arch.regs[VCPU_REGS_##uname] = val;                               
\
 }
 BUILD_KVM_GPR_ACCESSORS(rax, RAX)
 BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -67,6 +73,9 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu 
*vcpu, int reg)
        if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
                return 0;
 
+       if (kvm_x86_ops.reg_read_override)
+               kvm_x86_ops.reg_read_override(vcpu, reg);
+
        if (!kvm_register_is_available(vcpu, reg))
                kvm_x86_ops.cache_reg(vcpu, reg);
 
@@ -79,6 +88,9 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu, 
int reg,
        if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
                return;
 
+       if (kvm_x86_ops.reg_write_override)
+               kvm_x86_ops.reg_write_override(vcpu, reg, val);
+
        vcpu->arch.regs[reg] = val;
        kvm_register_mark_dirty(vcpu, reg);
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 779c167e42cc..d1f52211627a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1175,6 +1175,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        struct page *msrpm_pages;
        struct page *hsave_page;
        struct page *nested_msrpm_pages;
+       struct page *vmsa_page = NULL;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1197,9 +1198,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (!hsave_page)
                goto free_page3;
 
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests require a separate VMSA page used to contain
+                * the encrypted register state of the guest.
+                */
+               vmsa_page = alloc_page(GFP_KERNEL);
+               if (!vmsa_page)
+                       goto free_page4;
+       }
+
        err = avic_init_vcpu(svm);
        if (err)
-               goto free_page4;
+               goto free_page5;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1219,6 +1230,12 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        svm->vmcb = page_address(page);
        clear_page(svm->vmcb);
        svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
+
+       if (vmsa_page) {
+               svm->vmsa = page_address(vmsa_page);
+               clear_page(svm->vmsa);
+       }
+
        svm->asid_generation = 0;
        init_vmcb(svm);
 
@@ -1227,6 +1244,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
        return 0;
 
+free_page5:
+       if (vmsa_page)
+               __free_page(vmsa_page);
 free_page4:
        __free_page(hsave_page);
 free_page3:
@@ -1258,6 +1278,26 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
         */
        svm_clear_current_vmcb(svm->vmcb);
 
+       if (sev_es_guest(vcpu->kvm)) {
+               struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+
+               if (vcpu->arch.vmsa_encrypted) {
+                       u64 page_to_flush;
+
+                       /*
+                        * The VMSA page was used by hardware to hold guest
+                        * encrypted state, be sure to flush it before returning
+                        * it to the system. This is done using the VM Page
+                        * Flush MSR (which takes the page virtual address and
+                        * guest ASID).
+                        */
+                       page_to_flush = (u64)svm->vmsa | sev->asid;
+                       wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, page_to_flush);
+               }
+
+               __free_page(virt_to_page(svm->vmsa));
+       }
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
        __free_page(virt_to_page(svm->nested.hsave));
@@ -4012,6 +4052,99 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu 
*vcpu)
                   (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
 }
 
+/*
+ * These return values represent the offset in quad words within the VM save
+ * area. This allows them to be accessed by casting the save area to a u64
+ * array.
+ */
+#define VMSA_REG_ENTRY(_field)  (offsetof(struct vmcb_save_area, _field) / 
sizeof(u64))
+#define VMSA_REG_UNDEF          VMSA_REG_ENTRY(valid_bitmap)
+static inline unsigned int vcpu_to_vmsa_entry(enum kvm_reg reg)
+{
+       switch (reg) {
+       case VCPU_REGS_RAX:     return VMSA_REG_ENTRY(rax);
+       case VCPU_REGS_RBX:     return VMSA_REG_ENTRY(rbx);
+       case VCPU_REGS_RCX:     return VMSA_REG_ENTRY(rcx);
+       case VCPU_REGS_RDX:     return VMSA_REG_ENTRY(rdx);
+       case VCPU_REGS_RSP:     return VMSA_REG_ENTRY(rsp);
+       case VCPU_REGS_RBP:     return VMSA_REG_ENTRY(rbp);
+       case VCPU_REGS_RSI:     return VMSA_REG_ENTRY(rsi);
+       case VCPU_REGS_RDI:     return VMSA_REG_ENTRY(rdi);
+#ifdef CONFIG_X86_64
+       case VCPU_REGS_R8:      return VMSA_REG_ENTRY(r8);
+       case VCPU_REGS_R9:      return VMSA_REG_ENTRY(r9);
+       case VCPU_REGS_R10:     return VMSA_REG_ENTRY(r10);
+       case VCPU_REGS_R11:     return VMSA_REG_ENTRY(r11);
+       case VCPU_REGS_R12:     return VMSA_REG_ENTRY(r12);
+       case VCPU_REGS_R13:     return VMSA_REG_ENTRY(r13);
+       case VCPU_REGS_R14:     return VMSA_REG_ENTRY(r14);
+       case VCPU_REGS_R15:     return VMSA_REG_ENTRY(r15);
+#endif
+       case VCPU_REGS_RIP:     return VMSA_REG_ENTRY(rip);
+       default:
+               WARN_ONCE(1, "unsupported VCPU to VMSA register conversion\n");
+               return VMSA_REG_UNDEF;
+       }
+}
+
+/* For SEV-ES guests, populate the vCPU register from the appropriate 
VMSA/GHCB */
+static void svm_reg_read_override(struct kvm_vcpu *vcpu, enum kvm_reg reg)
+{
+       struct vmcb_save_area *vmsa;
+       struct vcpu_svm *svm;
+       unsigned int entry;
+       unsigned long val;
+       u64 *vmsa_reg;
+
+       if (!sev_es_guest(vcpu->kvm))
+               return;
+
+       entry = vcpu_to_vmsa_entry(reg);
+       if (entry == VMSA_REG_UNDEF)
+               return;
+
+       svm = to_svm(vcpu);
+       vmsa = get_vmsa(svm);
+       vmsa_reg = (u64 *)vmsa;
+       val = (unsigned long)vmsa_reg[entry];
+
+       /* If a GHCB is mapped, check the bitmap of valid entries */
+       if (svm->ghcb) {
+               if (!test_bit(entry, (unsigned long *)vmsa->valid_bitmap))
+                       val = 0;
+       }
+
+       vcpu->arch.regs[reg] = val;
+}
+
+/* For SEV-ES guests, set the vCPU register in the appropriate VMSA */
+static void svm_reg_write_override(struct kvm_vcpu *vcpu, enum kvm_reg reg,
+                                  unsigned long val)
+{
+       struct vmcb_save_area *vmsa;
+       struct vcpu_svm *svm;
+       unsigned int entry;
+       u64 *vmsa_reg;
+
+       entry = vcpu_to_vmsa_entry(reg);
+       if (entry == VMSA_REG_UNDEF)
+               return;
+
+       svm = to_svm(vcpu);
+       vmsa = get_vmsa(svm);
+       vmsa_reg = (u64 *)vmsa;
+
+       /* If a GHCB is mapped, set the bit to indicate a valid entry */
+       if (svm->ghcb) {
+               unsigned int index = entry / 8;
+               unsigned int shift = entry % 8;
+
+               vmsa->valid_bitmap[index] |= BIT(shift);
+       }
+
+       vmsa_reg[entry] = val;
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
        avic_vm_destroy(kvm);
@@ -4150,6 +4283,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
 
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+       .reg_read_override = svm_reg_read_override,
+       .reg_write_override = svm_reg_write_override,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f42ba9d158df..ff587536f571 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -159,6 +159,10 @@ struct vcpu_svm {
         */
        struct list_head ir_list;
        spinlock_t ir_list_lock;
+
+       /* SEV-ES support */
+       struct vmcb_save_area *vmsa;
+       struct ghcb *ghcb;
 };
 
 struct svm_cpu_data {
@@ -509,9 +513,34 @@ void sev_hardware_teardown(void);
 
 static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm)
 {
-       return &svm->vmcb->save;
+       struct vmcb_save_area *vmsa;
+
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * Before LAUNCH_UPDATE_VMSA, use the actual SEV-ES save area
+                * to construct the initial state.  Afterwards, use the mapped
+                * GHCB in a VMGEXIT or the traditional save area as a scratch
+                * area when outside of a VMGEXIT.
+                */
+               if (svm->vcpu.arch.vmsa_encrypted) {
+                       if (svm->ghcb)
+                               vmsa = &svm->ghcb->save;
+                       else
+                               vmsa = &svm->vmcb->save;
+               } else {
+                       vmsa = svm->vmsa;
+               }
+       } else {
+               vmsa = &svm->vmcb->save;
+       }
+
+       return vmsa;
 }
 
+#define SEV_ES_SET_VALID(_vmsa, _field)                                        
\
+       __set_bit(GHCB_BITMAP_IDX(_field),                              \
+                 (unsigned long *)(_vmsa)->valid_bitmap)
+
 #define DEFINE_VMSA_SEGMENT_ENTRY(_field, _entry, _size)               \
        static inline _size                                             \
        svm_##_field##_read_##_entry(struct vcpu_svm *svm)              \
@@ -528,6 +557,9 @@ static inline struct vmcb_save_area *get_vmsa(struct 
vcpu_svm *svm)
                struct vmcb_save_area *vmsa = get_vmsa(svm);            \
                                                                        \
                vmsa->_field._entry = value;                            \
+               if (svm->vcpu.arch.vmsa_encrypted) {                    \
+                       SEV_ES_SET_VALID(vmsa, _field);                 \
+               }                                                       \
        }                                                               \
 
 #define DEFINE_VMSA_SEGMENT_ACCESSOR(_field)                           \
@@ -551,6 +583,9 @@ static inline struct vmcb_save_area *get_vmsa(struct 
vcpu_svm *svm)
                struct vmcb_save_area *vmsa = get_vmsa(svm);            \
                                                                        \
                vmsa->_field = *seg;                                    \
+               if (svm->vcpu.arch.vmsa_encrypted) {                    \
+                       SEV_ES_SET_VALID(vmsa, _field);                 \
+               }                                                       \
        }
 
 DEFINE_VMSA_SEGMENT_ACCESSOR(cs)
@@ -579,6 +614,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
                struct vmcb_save_area *vmsa = get_vmsa(svm);            \
                                                                        \
                vmsa->_field = value;                                   \
+               if (svm->vcpu.arch.vmsa_encrypted) {                    \
+                       SEV_ES_SET_VALID(vmsa, _field);                 \
+               }                                                       \
        }                                                               \
                                                                        \
        static inline void                                              \
@@ -587,6 +625,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
                struct vmcb_save_area *vmsa = get_vmsa(svm);            \
                                                                        \
                vmsa->_field &= value;                                  \
+               if (svm->vcpu.arch.vmsa_encrypted) {                    \
+                       SEV_ES_SET_VALID(vmsa, _field);                 \
+               }                                                       \
        }                                                               \
                                                                        \
        static inline void                                              \
@@ -595,6 +636,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
                struct vmcb_save_area *vmsa = get_vmsa(svm);            \
                                                                        \
                vmsa->_field |= value;                                  \
+               if (svm->vcpu.arch.vmsa_encrypted) {                    \
+                       SEV_ES_SET_VALID(vmsa, _field);                 \
+               }                                                       \
        }
 
 #define DEFINE_VMSA_ACCESSOR(_field)                                   \
@@ -629,6 +673,25 @@ DEFINE_VMSA_ACCESSOR(last_excp_to)
 DEFINE_VMSA_U8_ACCESSOR(cpl)
 DEFINE_VMSA_ACCESSOR(rip)
 DEFINE_VMSA_ACCESSOR(rax)
+DEFINE_VMSA_ACCESSOR(rbx)
+DEFINE_VMSA_ACCESSOR(rcx)
+DEFINE_VMSA_ACCESSOR(rdx)
 DEFINE_VMSA_ACCESSOR(rsp)
+DEFINE_VMSA_ACCESSOR(rbp)
+DEFINE_VMSA_ACCESSOR(rsi)
+DEFINE_VMSA_ACCESSOR(rdi)
+DEFINE_VMSA_ACCESSOR(r8)
+DEFINE_VMSA_ACCESSOR(r9)
+DEFINE_VMSA_ACCESSOR(r10)
+DEFINE_VMSA_ACCESSOR(r11)
+DEFINE_VMSA_ACCESSOR(r12)
+DEFINE_VMSA_ACCESSOR(r13)
+DEFINE_VMSA_ACCESSOR(r14)
+DEFINE_VMSA_ACCESSOR(r15)
+DEFINE_VMSA_ACCESSOR(sw_exit_code)
+DEFINE_VMSA_ACCESSOR(sw_exit_info_1)
+DEFINE_VMSA_ACCESSOR(sw_exit_info_2)
+DEFINE_VMSA_ACCESSOR(sw_scratch)
+DEFINE_VMSA_ACCESSOR(xcr0)
 
 #endif
-- 
2.28.0

Reply via email to