This patch exposes 5 level page table feature to the VM,
at the same time, the canonical virtual address checking is
extended to support both 48-bits and 57-bits address width,
it's the prerequisite to support 5 level paging guest.

Signed-off-by: Liang Li <liang.z...@intel.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
Cc: Dave Hansen <dave.han...@linux.intel.com>
Cc: Xiao Guangrong <guangrong.x...@linux.intel.com>
Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: "Radim Kr??m????" <rkrc...@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++------
 arch/x86/kvm/cpuid.c            | 15 +++++++++------
 arch/x86/kvm/emulate.c          | 15 ++++++++++-----
 arch/x86/kvm/kvm_cache_regs.h   |  7 ++++++-
 arch/x86/kvm/vmx.c              |  4 ++--
 arch/x86/kvm/x86.c              |  8 ++++++--
 6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e505dac..57850b3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,8 +86,8 @@
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
                          | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
-                         | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
-                         | X86_CR4_PKE))
+                         | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
+                         | X86_CR4_SMAP | X86_CR4_PKE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
@@ -1269,15 +1269,15 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, 
u32 error_code)
        kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-static inline u64 get_canonical(u64 la)
+static inline u64 get_canonical(u64 la, u8 vaddr_bits)
 {
-       return ((int64_t)la << 16) >> 16;
+       return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
 }
 
-static inline bool is_noncanonical_address(u64 la)
+static inline bool is_noncanonical_address(u64 la, u8 vaddr_bits)
 {
 #ifdef CONFIG_X86_64
-       return get_canonical(la) != la;
+       return get_canonical(la, vaddr_bits) != la;
 #else
        return false;
 #endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e85f6bd..69e8c1a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -126,13 +126,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
        kvm_x86_ops->fpu_activate(vcpu);
 
        /*
-        * The existing code assumes virtual address is 48-bit in the canonical
-        * address checks; exit if it is ever changed.
+        * The existing code assumes virtual address is 48-bit or 57-bit in the
+        * canonical address checks; exit if it is ever changed.
         */
        best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-       if (best && ((best->eax & 0xff00) >> 8) != 48 &&
-               ((best->eax & 0xff00) >> 8) != 0)
-               return -EINVAL;
+       if (best) {
+               int vaddr_bits = (best->eax & 0xff00) >> 8;
+
+               if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
+                       return -EINVAL;
+       }
 
        /* Update physical-address width */
        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -383,7 +386,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 
        /* cpuid 7.0.ecx*/
        const u32 kvm_cpuid_7_0_ecx_x86_features =
-               F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
+               F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/;
 
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56628a4..da01dd7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -676,6 +676,11 @@ static unsigned insn_alignment(struct x86_emulate_ctxt 
*ctxt, unsigned size)
        }
 }
 
+static __always_inline u8 virt_addr_bits(struct x86_emulate_ctxt *ctxt)
+{
+       return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
+}
+
 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
                                       struct segmented_address addr,
                                       unsigned *max_size, unsigned size,
@@ -693,7 +698,7 @@ static __always_inline int __linearize(struct 
x86_emulate_ctxt *ctxt,
        switch (mode) {
        case X86EMUL_MODE_PROT64:
                *linear = la;
-               if (is_noncanonical_address(la))
+               if (is_noncanonical_address(la, virt_addr_bits(ctxt)))
                        goto bad;
 
                *max_size = min_t(u64, ~0u, (1ull << 48) - la);
@@ -1721,7 +1726,7 @@ static int __load_segment_descriptor(struct 
x86_emulate_ctxt *ctxt,
                if (ret != X86EMUL_CONTINUE)
                        return ret;
                if (is_noncanonical_address(get_desc_base(&seg_desc) |
-                                            ((u64)base3 << 32)))
+                               ((u64)base3 << 32), virt_addr_bits(ctxt)))
                        return emulate_gp(ctxt, 0);
        }
 load:
@@ -2796,8 +2801,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                ss_sel = cs_sel + 8;
                cs.d = 0;
                cs.l = 1;
-               if (is_noncanonical_address(rcx) ||
-                   is_noncanonical_address(rdx))
+               if (is_noncanonical_address(rcx, virt_addr_bits(ctxt)) ||
+                   is_noncanonical_address(rdx, virt_addr_bits(ctxt)))
                        return emulate_gp(ctxt, 0);
                break;
        }
@@ -3712,7 +3717,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, 
bool lgdt)
        if (rc != X86EMUL_CONTINUE)
                return rc;
        if (ctxt->mode == X86EMUL_MODE_PROT64 &&
-           is_noncanonical_address(desc_ptr.address))
+           is_noncanonical_address(desc_ptr.address, virt_addr_bits(ctxt)))
                return emulate_gp(ctxt, 0);
        if (lgdt)
                ctxt->ops->set_gdt(ctxt, &desc_ptr);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2..5daf75f 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
 #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
 #define KVM_POSSIBLE_CR4_GUEST_BITS                              \
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
-        | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+        | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
 
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
                                              enum kvm_reg reg)
@@ -78,6 +78,11 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
        return kvm_read_cr4_bits(vcpu, ~0UL);
 }
 
+static inline u8 get_virt_addr_bits(struct kvm_vcpu *vcpu)
+{
+       return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+}
+
 static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 {
        return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfc9f0a..183a53e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -123,7 +123,7 @@
        (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-        | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
+        | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -7017,7 +7017,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
                 * non-canonical form. This is the only check on the memory
                 * destination for long mode!
                 */
-               exn = is_noncanonical_address(*ret);
+               exn = is_noncanonical_address(*ret, get_virt_addr_bits(vcpu));
        } else if (is_protmode(vcpu)) {
                /* Protected mode: apply checks for segment validity in the
                 * following order:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51ccfe0..b935658 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -762,6 +762,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
                return 1;
 
+       if (!guest_cpuid_has_la57(vcpu) && (cr4 & X86_CR4_LA57))
+               return 1;
+
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE))
                        return 1;
@@ -1074,7 +1077,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data 
*msr)
        case MSR_KERNEL_GS_BASE:
        case MSR_CSTAR:
        case MSR_LSTAR:
-               if (is_noncanonical_address(msr->data))
+               if (is_noncanonical_address(msr->data,
+                               get_virt_addr_bits(vcpu)))
                        return 1;
                break;
        case MSR_IA32_SYSENTER_EIP:
@@ -1091,7 +1095,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data 
*msr)
                 * value, and that something deterministic happens if the guest
                 * invokes 64-bit SYSENTER.
                 */
-               msr->data = get_canonical(msr->data);
+               msr->data = get_canonical(msr->data, get_virt_addr_bits(vcpu));
        }
        return kvm_x86_ops->set_msr(vcpu, msr);
 }
-- 
1.9.1

Reply via email to