We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
   VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
   of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
   VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel <anup.pa...@wdc.com>
Acked-by: Paolo Bonzini <pbonz...@redhat.com>
Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
Reviewed-by: Alexander Graf <g...@amazon.com>
---
 arch/riscv/include/asm/kvm_host.h |  25 ++++++
 arch/riscv/kvm/Makefile           |   3 +-
 arch/riscv/kvm/main.c             |   4 +
 arch/riscv/kvm/tlb.S              |  43 +++++++++++
 arch/riscv/kvm/vcpu.c             |   9 +++
 arch/riscv/kvm/vm.c               |   6 ++
 arch/riscv/kvm/vmid.c             | 123 ++++++++++++++++++++++++++++++
 7 files changed, 212 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/kvm/tlb.S
 create mode 100644 arch/riscv/kvm/vmid.c

diff --git a/arch/riscv/include/asm/kvm_host.h 
b/arch/riscv/include/asm/kvm_host.h
index 7041d2a9304a..9410468678ae 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #define KVM_REQ_SLEEP \
        KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_VCPU_RESET             KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP           KVM_ARCH_REQ(2)
 
 struct kvm_vm_stat {
        ulong remote_tlb_flush;
@@ -47,7 +48,19 @@ struct kvm_vcpu_stat {
 struct kvm_arch_memory_slot {
 };
 
+struct kvm_vmid {
+       /*
+        * Writes to vmid_version and vmid happen with vmid_lock held
+        * whereas reads happen without any lock held.
+        */
+       unsigned long vmid_version;
+       unsigned long vmid;
+};
+
 struct kvm_arch {
+       /* stage2 vmid */
+       struct kvm_vmid vmid;
+
        /* stage2 page table */
        pgd_t *pgd;
        phys_addr_t pgd_phys;
@@ -170,6 +183,12 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 int kvm_riscv_setup_vsip(void);
 void kvm_riscv_cleanup_vsip(void);
 
+void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
+                                     unsigned long gpa);
+void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
+void __kvm_riscv_hfence_gvma_all(void);
+
 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
                         bool is_write);
 void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
@@ -177,6 +196,12 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
 
+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
 void __kvm_riscv_unpriv_trap(void);
 
 unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 845579273727..c0f57f26c13d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -8,6 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
 
 kvm-objs := $(common-objs-y)
 
-kvm-objs += main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
 
 obj-$(CONFIG_KVM)      += kvm.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index d088247843c5..55df85184241 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -72,8 +72,12 @@ int kvm_arch_init(void *opaque)
        if (ret)
                return ret;
 
+       kvm_riscv_stage2_vmid_detect();
+
        kvm_info("hypervisor extension available\n");
 
+       kvm_info("host has %ld VMID bits\n", kvm_riscv_stage2_vmid_bits());
+
        return 0;
 }
 
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index 000000000000..453fca8d7940
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ *     Anup Patel <anup.pa...@wdc.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+       .text
+       .altmacro
+       .option norelax
+
+       /*
+        * Instruction encoding of hfence.gvma is:
+        * 0110001 rs2(5) rs1(5) 000 00000 1110011
+        */
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+       /* hfence.gvma a1, a0 */
+       .word 0x62a60073
+       ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+       /* hfence.gvma zero, a0 */
+       .word 0x62a00073
+       ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+       /* hfence.gvma a0 */
+       .word 0x62050073
+       ret
+ENDPROC(__kvm_riscv_hfence_gvma_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_all)
+       /* hfence.gvma */
+       .word 0x62000073
+       ret
+ENDPROC(__kvm_riscv_hfence_gvma_all)
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 9a720a08675e..36957802fed4 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -640,6 +640,12 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu 
*vcpu)
 
                if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
                        kvm_riscv_reset_vcpu(vcpu);
+
+               if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
+                       kvm_riscv_stage2_update_hgatp(vcpu);
+
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+                       __kvm_riscv_hfence_gvma_all();
        }
 }
 
@@ -702,6 +708,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
                /* Check conditions before entering the guest */
                cond_resched();
 
+               kvm_riscv_stage2_vmid_update(vcpu);
+
                kvm_riscv_check_vcpu_requests(vcpu);
 
                preempt_disable();
@@ -738,6 +746,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
                kvm_riscv_update_vsip(vcpu);
 
                if (ret <= 0 ||
+                   kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
                    kvm_request_pending(vcpu)) {
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        local_irq_enable();
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index ac0211820521..c5aab5478c38 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -26,6 +26,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        if (r)
                return r;
 
+       r = kvm_riscv_stage2_vmid_init(kvm);
+       if (r) {
+               kvm_riscv_stage2_free_pgd(kvm);
+               return r;
+       }
+
        return 0;
 }
 
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
new file mode 100644
index 000000000000..69f770fa4f46
--- /dev/null
+++ b/arch/riscv/kvm/vmid.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ *     Anup Patel <anup.pa...@wdc.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+
+static unsigned long vmid_version = 1;
+static unsigned long vmid_next;
+static unsigned long vmid_bits;
+static DEFINE_SPINLOCK(vmid_lock);
+
+void kvm_riscv_stage2_vmid_detect(void)
+{
+       unsigned long old;
+
+       /* Figure-out number of VMID bits in HW */
+       old = csr_read(CSR_HGATP);
+       csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
+       vmid_bits = csr_read(CSR_HGATP);
+       vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT;
+       vmid_bits = fls_long(vmid_bits);
+       csr_write(CSR_HGATP, old);
+
+       /* We polluted local TLB so flush all guest TLB */
+       __kvm_riscv_hfence_gvma_all();
+
+       /* We don't use VMID bits if they are not sufficient */
+       if ((1UL << vmid_bits) < num_possible_cpus())
+               vmid_bits = 0;
+}
+
+unsigned long kvm_riscv_stage2_vmid_bits(void)
+{
+       return vmid_bits;
+}
+
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
+{
+       /* Mark the initial VMID and VMID version invalid */
+       kvm->arch.vmid.vmid_version = 0;
+       kvm->arch.vmid.vmid = 0;
+
+       return 0;
+}
+
+static void local_guest_tlb_flush_all(void *info)
+{
+       __kvm_riscv_hfence_gvma_all();
+}
+
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
+{
+       if (!vmid_bits)
+               return false;
+
+       return unlikely(READ_ONCE(vmid->vmid_version) !=
+                       READ_ONCE(vmid_version));
+}
+
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
+{
+       int i;
+       struct kvm_vcpu *v;
+       struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
+
+       if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
+               return;
+
+       spin_lock(&vmid_lock);
+
+       /*
+        * We need to re-check the vmid_version here to ensure that if
+        * another vcpu already allocated a valid vmid for this vm.
+        */
+       if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
+               spin_unlock(&vmid_lock);
+               return;
+       }
+
+       /* First user of a new VMID version? */
+       if (unlikely(vmid_next == 0)) {
+               WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+               vmid_next = 1;
+
+               /*
+                * We ran out of VMIDs so we increment vmid_version and
+                * start assigning VMIDs from 1.
+                *
+                * This also means existing VMIDs assignement to all Guest
+                * instances is invalid and we have force VMID re-assignement
+                * for all Guest instances. The Guest instances that were not
+                * running will automatically pick-up new VMIDs because will
+                * call kvm_riscv_stage2_vmid_update() whenever they enter
+                * in-kernel run loop. For Guest instances that are already
+                * running, we force VM exits on all host CPUs using IPI and
+                * flush all Guest TLBs.
+                */
+               smp_call_function_many(cpu_online_mask,
+                                      local_guest_tlb_flush_all, NULL, true);
+       }
+
+       vmid->vmid = vmid_next;
+       vmid_next++;
+       vmid_next &= (1 << vmid_bits) - 1;
+
+       WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version));
+
+       spin_unlock(&vmid_lock);
+
+       /* Request stage2 page table update for all VCPUs */
+       kvm_for_each_vcpu(i, v, vcpu->kvm)
+               kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
+}
-- 
2.17.1

Reply via email to