date:20210302

[PATCH] KVM: arm64: Fix unaligned addr case in mmu walking

2021-03-02 Thread Jia He

If the start addr is not aligned with the granule size of that level.
loop step size should be adjusted to boundary instead of simple
kvm_granual_size(level) increment. Otherwise, some mmu entries might miss
the chance to be walked through.
E.g. Assume the unmap range [data->addr, data->end] is
[0xff00ab2000,0xff00cb2000] in level 2 walking and NOT block mapping.
And the 1st part of that pmd entry is [0xff00ab2000,0xff00c0]. The
pmd value is 0x83fbd2c1002 (not valid entry). In this case, data->addr
should be adjusted to 0xff00c0 instead of 0xff00cb2000.

Without this fix, userspace "segment fault" error can be easily
triggered by running simple gVisor runsc cases on an Ampere Altra
server:
docker run --runtime=runsc -it --rm  ubuntu /bin/bash

In container:
for i in `seq 1 100`;do ls;done

Reported-by: Howard Zhang 
Signed-off-by: Jia He 
---
 arch/arm64/kvm/hyp/pgtable.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index bdf8e55ed308..4d99d07c610c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -225,6 +225,7 @@ static inline int __kvm_pgtable_visit(struct 
kvm_pgtable_walk_data *data,
goto out;
 
if (!table) {
+   data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
data->addr += kvm_granule_size(level);
goto out;
}
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 30/32] KVM: arm64: Page-align the .hyp sections

2021-03-02 Thread Quentin Perret

We will soon unmap the .hyp sections from the host stage 2 in Protected
nVHE mode, which obvisouly works with at least page granularity, so make
sure to align them correctly.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kernel/vmlinux.lds.S | 22 +-
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e96173ce211b..709d2c433c5e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -15,9 +15,11 @@
 
 #define HYPERVISOR_DATA_SECTIONS   \
HYP_SECTION_NAME(.rodata) : {   \
+   . = ALIGN(PAGE_SIZE);   \
__hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init))   \
*(HYP_SECTION_NAME(.rodata))\
+   . = ALIGN(PAGE_SIZE);   \
__hyp_rodata_end = .;   \
}
 
@@ -72,21 +74,14 @@ ENTRY(_text)
 jiffies = jiffies_64;
 
 #define HYPERVISOR_TEXT\
-   /*  \
-* Align to 4 KB so that\
-* a) the HYP vector table is at its minimum\
-*alignment of 2048 bytes   \
-* b) the HYP init code will not cross a page   \
-*boundary if its size does not exceed  \
-*4 KB (see related ASSERT() below) \
-*/ \
-   . = ALIGN(SZ_4K);   \
+   . = ALIGN(PAGE_SIZE);   \
__hyp_idmap_text_start = .; \
*(.hyp.idmap.text)  \
__hyp_idmap_text_end = .;   \
__hyp_text_start = .;   \
*(.hyp.text)\
HYPERVISOR_EXTABLE  \
+   . = ALIGN(PAGE_SIZE);   \
__hyp_text_end = .;
 
 #define IDMAP_TEXT \
@@ -322,11 +317,12 @@ SECTIONS
 #include "image-vars.h"
 
 /*
- * The HYP init code and ID map text can't be longer than a page each,
- * and should not cross a page boundary.
+ * The HYP init code and ID map text can't be longer than a page each. The
+ * former is page-aligned, but the latter may not be with 16K or 64K pages, so
+ * it should also not cross a page boundary.
  */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
-   "HYP init code too big or misaligned")
+ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE,
+   "HYP init code too big")
 ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
 #ifdef CONFIG_HIBERNATION
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 26/32] KVM: arm64: Introduce PROT_NONE mappings for stage 2

2021-03-02 Thread Quentin Perret

Once we start unmapping portions of memory from the host stage 2 (such
as e.g. the hypervisor memory sections, or pages that belong to
protected guests), we will need a way to track page ownership. And
given that all mappings in the host stage 2 will be identity-mapped, we
can use the host stage 2 page-table itself as a simplistic rmap.

As a first step towards this, introduce a new protection attribute
in the stage 2 page table code, called KVM_PGTABLE_PROT_NONE, which
allows to annotate portions of the IPA space as inaccessible. For
simplicity, PROT_NONE mappings are created as invalid mappings with a
software bit set.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h |  2 ++
 arch/arm64/kvm/hyp/pgtable.c | 26 --
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 9935dbae2cc1..c9f6ed76e0ad 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -80,6 +80,7 @@ struct kvm_pgtable {
  * @KVM_PGTABLE_PROT_W:Write permission.
  * @KVM_PGTABLE_PROT_R:Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:   Device attributes.
+ * @KVM_PGTABLE_PROT_NONE: No permission.
  */
 enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X  = BIT(0),
@@ -87,6 +88,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R  = BIT(2),
 
KVM_PGTABLE_PROT_DEVICE = BIT(3),
+   KVM_PGTABLE_PROT_NONE   = BIT(4),
 };
 
 #define PAGE_HYP   (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index bdd6e3d4eeb6..8e7059fcfd40 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -48,6 +48,8 @@
 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
+#define KVM_PTE_LEAF_SW_BIT_PROT_NONE  BIT(55)
+
 struct kvm_pgtable_walk_data {
struct kvm_pgtable  *pgt;
struct kvm_pgtable_walker   *walker;
@@ -120,6 +122,16 @@ static bool kvm_pte_valid(kvm_pte_t pte)
return pte & KVM_PTE_VALID;
 }
 
+static bool kvm_pte_prot_none(kvm_pte_t pte)
+{
+   return pte & KVM_PTE_LEAF_SW_BIT_PROT_NONE;
+}
+
+static inline bool stage2_is_permanent_mapping(kvm_pte_t pte)
+{
+   return kvm_pte_prot_none(pte);
+}
+
 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 {
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
@@ -182,7 +194,8 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t 
attr, u32 level)
 
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
-   pte |= KVM_PTE_VALID;
+   if (!kvm_pte_prot_none(pte))
+   pte |= KVM_PTE_VALID;
 
return pte;
 }
@@ -317,7 +330,7 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
   KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
 
-   if (!(prot & KVM_PGTABLE_PROT_R))
+   if (!(prot & KVM_PGTABLE_PROT_R) || (prot & KVM_PGTABLE_PROT_NONE))
return -EINVAL;
 
if (prot & KVM_PGTABLE_PROT_X) {
@@ -489,6 +502,13 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
PAGE_S2_MEMATTR(NORMAL);
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
 
+   if (prot & KVM_PGTABLE_PROT_NONE) {
+   if (prot != KVM_PGTABLE_PROT_NONE)
+   return -EINVAL;
+   attr |= KVM_PTE_LEAF_SW_BIT_PROT_NONE;
+   goto out;
+   }
+
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
else if (device)
@@ -502,6 +522,8 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
 
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+
+out:
data->attr = attr;
return 0;
 }
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 27/32] KVM: arm64: Refactor stage2_map_set_prot_attr()

2021-03-02 Thread Quentin Perret

In order to ease its re-use in other code paths, refactor
stage2_map_set_prot_attr() to not depend on a stage2_map_data struct.
No functional change intended.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/pgtable.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 8e7059fcfd40..8aa01a9e2603 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -494,8 +494,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
return vtcr;
 }
 
-static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
-   struct stage2_map_data *data)
+static kvm_pte_t stage2_get_prot_attr(enum kvm_pgtable_prot prot)
 {
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
@@ -504,15 +503,15 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
 
if (prot & KVM_PGTABLE_PROT_NONE) {
if (prot != KVM_PGTABLE_PROT_NONE)
-   return -EINVAL;
+   return 0;
attr |= KVM_PTE_LEAF_SW_BIT_PROT_NONE;
-   goto out;
+   return attr;
}
 
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
else if (device)
-   return -EINVAL;
+   return 0;
 
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -523,9 +522,7 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
 
-out:
-   data->attr = attr;
-   return 0;
+   return attr;
 }
 
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
@@ -708,9 +705,9 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
addr, u64 size,
.arg= &map_data,
};
 
-   ret = stage2_map_set_prot_attr(prot, &map_data);
-   if (ret)
-   return ret;
+   map_data.attr = stage2_get_prot_attr(prot);
+   if (!map_data.attr)
+   return -EINVAL;
 
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 24/32] KVM: arm64: Reserve memory for host stage 2

2021-03-02 Thread Quentin Perret

Extend the memory pool allocated for the hypervisor to include enough
pages to map all of memory at page granularity for the host stage 2.
While at it, also reserve some memory for device mappings.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mm.h | 23 ++-
 arch/arm64/kvm/hyp/nvhe/setup.c  | 12 
 arch/arm64/kvm/hyp/reserved_mem.c|  2 ++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h 
b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index ac0f7fcffd08..411a35db949c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -53,7 +53,7 @@ static inline unsigned long __hyp_pgtable_max_pages(unsigned 
long nr_pages)
return total;
 }
 
-static inline unsigned long hyp_s1_pgtable_pages(void)
+static inline unsigned long __hyp_pgtable_total_pages(void)
 {
unsigned long res = 0, i;
 
@@ -63,9 +63,30 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
}
 
+   return res;
+}
+
+static inline unsigned long hyp_s1_pgtable_pages(void)
+{
+   unsigned long res;
+
+   res = __hyp_pgtable_total_pages();
+
/* Allow 1 GiB for private mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
 
return res;
 }
+
+static inline unsigned long host_s2_mem_pgtable_pages(void)
+{
+   return __hyp_pgtable_total_pages() + 16;
+}
+
+static inline unsigned long host_s2_dev_pgtable_pages(void)
+{
+   /* Allow 1 GiB for private mappings */
+   return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+}
+
 #endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 178ec06f2b49..7e923b25271c 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -24,6 +24,8 @@ unsigned long hyp_nr_cpus;
 
 static void *vmemmap_base;
 static void *hyp_pgt_base;
+static void *host_s2_mem_pgt_base;
+static void *host_s2_dev_pgt_base;
 
 static int divide_memory_pool(void *virt, unsigned long size)
 {
@@ -42,6 +44,16 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!hyp_pgt_base)
return -ENOMEM;
 
+   nr_pages = host_s2_mem_pgtable_pages();
+   host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
+   if (!host_s2_mem_pgt_base)
+   return -ENOMEM;
+
+   nr_pages = host_s2_dev_pgtable_pages();
+   host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
+   if (!host_s2_dev_pgt_base)
+   return -ENOMEM;
+
return 0;
 }
 
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c 
b/arch/arm64/kvm/hyp/reserved_mem.c
index 9bc6a6d27904..fd42705a3c26 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -52,6 +52,8 @@ void __init kvm_hyp_reserve(void)
}
 
hyp_mem_pages += hyp_s1_pgtable_pages();
+   hyp_mem_pages += host_s2_mem_pgtable_pages();
+   hyp_mem_pages += host_s2_dev_pgtable_pages();
 
/*
 * The hyp_vmemmap needs to be backed by pages, but these pages
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 31/32] KVM: arm64: Disable PMU support in protected mode

2021-03-02 Thread Quentin Perret

The host currently writes directly in EL2 per-CPU data sections from
the PMU code when running in nVHE. In preparation for unmapping the EL2
sections from the host stage 2, disable PMU support in protected mode as
we currently do not have a use-case for it.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/perf.c | 3 ++-
 arch/arm64/kvm/pmu.c  | 8 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index 739164324afe..8f860ae56bb7 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -55,7 +55,8 @@ int kvm_perf_init(void)
 * hardware performance counters. This could ensure the presence of
 * a physical PMU and CONFIG_PERF_EVENT is selected.
 */
-   if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
+   if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0
+  && !is_protected_kvm_enabled())
static_branch_enable(&kvm_arm_pmu_available);
 
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index faf32a44ba04..03a6c1f4a09a 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -33,7 +33,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
 {
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
 
-   if (!ctx || !kvm_pmu_switch_needed(attr))
+   if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr))
return;
 
if (!attr->exclude_host)
@@ -49,7 +49,7 @@ void kvm_clr_pmu_events(u32 clr)
 {
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
 
-   if (!ctx)
+   if (!kvm_arm_support_pmu_v3() || !ctx)
return;
 
ctx->pmu_events.events_host &= ~clr;
@@ -172,7 +172,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
struct kvm_host_data *host;
u32 events_guest, events_host;
 
-   if (!has_vhe())
+   if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
 
preempt_disable();
@@ -193,7 +193,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
struct kvm_host_data *host;
u32 events_guest, events_host;
 
-   if (!has_vhe())
+   if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
 
host = this_cpu_ptr_hyp_sym(kvm_host_data);
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 32/32] KVM: arm64: Protect the .hyp sections from the host

2021-03-02 Thread Quentin Perret

When KVM runs in nVHE protected mode, use the host stage 2 to unmap the
hypervisor sections. The long-term goal is to ensure the EL2 code can
remain robust regardless of the host's state, so this starts by making
sure the host cannot e.g. write to the .hyp sections directly.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h  |  1 +
 arch/arm64/kvm/arm.c  | 46 +++
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c|  9 
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 22 +
 5 files changed, 80 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b127af02bd45..9accf5350858 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -62,6 +62,7 @@
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
+#define __KVM_HOST_SMCCC_FUNC___pkvm_host_unmap20
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a31c56bc55b3..73c26d206542 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1894,11 +1894,57 @@ void _kvm_host_prot_finalize(void *discard)
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
 }
 
+static inline int pkvm_host_unmap(phys_addr_t start, phys_addr_t end)
+{
+   return kvm_call_hyp_nvhe(__pkvm_host_unmap, start, end);
+}
+
+#define pkvm_host_unmap_section(__section) \
+   pkvm_host_unmap(__pa_symbol(__section##_start), \
+   __pa_symbol(__section##_end))
+
 static int finalize_hyp_mode(void)
 {
+   int cpu, ret;
+
if (!is_protected_kvm_enabled())
return 0;
 
+   ret = pkvm_host_unmap_section(__hyp_idmap_text);
+   if (ret)
+   return ret;
+
+   ret = pkvm_host_unmap_section(__hyp_text);
+   if (ret)
+   return ret;
+
+   ret = pkvm_host_unmap_section(__hyp_rodata);
+   if (ret)
+   return ret;
+
+   ret = pkvm_host_unmap_section(__hyp_bss);
+   if (ret)
+   return ret;
+
+   ret = pkvm_host_unmap(hyp_mem_base, hyp_mem_base + hyp_mem_size);
+   if (ret)
+   return ret;
+
+   for_each_possible_cpu(cpu) {
+   phys_addr_t start = virt_to_phys((void 
*)kvm_arm_hyp_percpu_base[cpu]);
+   phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
+
+   ret = pkvm_host_unmap(start, end);
+   if (ret)
+   return ret;
+
+   start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, 
cpu));
+   end = start + PAGE_SIZE;
+   ret = pkvm_host_unmap(start, end);
+   if (ret)
+   return ret;
+   }
+
/*
 * Flip the static key upfront as that may no longer be possible
 * once the host stage 2 is installed.
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index d293cb328cc4..39890d4f1dc8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -21,6 +21,8 @@ struct host_kvm {
 extern struct host_kvm host_kvm;
 
 int __pkvm_prot_finalize(void);
+int __pkvm_host_unmap(phys_addr_t start, phys_addr_t end);
+
 int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c 
b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f47028d3fd0a..2069136fdaec 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -156,6 +156,14 @@ static void handle___pkvm_prot_finalize(struct 
kvm_cpu_context *host_ctxt)
 {
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
 }
+
+static void handle___pkvm_host_unmap(struct kvm_cpu_context *host_ctxt)
+{
+   DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
+   DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
+
+   cpu_reg(host_ctxt, 1) = __pkvm_host_unmap(start, end);
+}
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -180,6 +188,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_create_mappings),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_prot_finalize),
+   HANDLE_FUNC(__pkvm_host_unmap),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2252ad1a8945..ed480facdc88 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -196,6 +196,28 @@ static int host_stage2_idma

[PATCH v3 29/32] KVM: arm64: Wrap the host with a stage 2

2021-03-02 Thread Quentin Perret

When KVM runs in protected nVHE mode, make use of a stage 2 page-table
to give the hypervisor some control over the host memory accesses. The
host stage 2 is created lazily using large block mappings if possible,
and will default to page mappings in absence of a better solution.

>From this point on, memory accesses from the host to protected memory
regions (e.g. marked PROT_NONE) are fatal and lead to hyp_panic().

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h  |   1 +
 arch/arm64/include/asm/kvm_cpufeature.h   |   2 +
 arch/arm64/kernel/image-vars.h|   3 +
 arch/arm64/kvm/arm.c  |  10 +
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  34 +++
 arch/arm64/kvm/hyp/nvhe/Makefile  |   2 +-
 arch/arm64/kvm/hyp/nvhe/hyp-init.S|   1 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c|  11 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 213 ++
 arch/arm64/kvm/hyp/nvhe/setup.c   |   5 +
 arch/arm64/kvm/hyp/nvhe/switch.c  |   7 +-
 arch/arm64/kvm/hyp/nvhe/tlb.c |   4 +-
 12 files changed, 286 insertions(+), 7 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/mem_protect.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6dce860f8bca..b127af02bd45 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -61,6 +61,7 @@
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings   16
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector18
+#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/kvm_cpufeature.h 
b/arch/arm64/include/asm/kvm_cpufeature.h
index d34f85cba358..74043a149322 100644
--- a/arch/arm64/include/asm/kvm_cpufeature.h
+++ b/arch/arm64/include/asm/kvm_cpufeature.h
@@ -15,3 +15,5 @@
 #endif
 
 KVM_HYP_CPU_FTR_REG(SYS_CTR_EL0, arm64_ftr_reg_ctrel0)
+KVM_HYP_CPU_FTR_REG(SYS_ID_AA64MMFR0_EL1, arm64_ftr_reg_id_aa64mmfr0_el1)
+KVM_HYP_CPU_FTR_REG(SYS_ID_AA64MMFR1_EL1, arm64_ftr_reg_id_aa64mmfr1_el1)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 940c378fa837..d5dc2b792651 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -131,6 +131,9 @@ KVM_NVHE_ALIAS(__hyp_bss_end);
 KVM_NVHE_ALIAS(__hyp_rodata_start);
 KVM_NVHE_ALIAS(__hyp_rodata_end);
 
+/* pKVM static key */
+KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
+
 #endif /* CONFIG_KVM */
 
 #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index b6a818f88051..a31c56bc55b3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1889,12 +1889,22 @@ static int init_hyp_mode(void)
return err;
 }
 
+void _kvm_host_prot_finalize(void *discard)
+{
+   WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
+}
+
 static int finalize_hyp_mode(void)
 {
if (!is_protected_kvm_enabled())
return 0;
 
+   /*
+* Flip the static key upfront as that may no longer be possible
+* once the host stage 2 is installed.
+*/
static_branch_enable(&kvm_protected_mode_initialized);
+   on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
 
return 0;
 }
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
new file mode 100644
index ..d293cb328cc4
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret 
+ */
+
+#ifndef __KVM_NVHE_MEM_PROTECT__
+#define __KVM_NVHE_MEM_PROTECT__
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct host_kvm {
+   struct kvm_arch arch;
+   struct kvm_pgtable pgt;
+   struct kvm_pgtable_mm_ops mm_ops;
+   hyp_spinlock_t lock;
+};
+extern struct host_kvm host_kvm;
+
+int __pkvm_prot_finalize(void);
+int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
+
+static __always_inline void __load_host_stage2(void)
+{
+   if (static_branch_likely(&kvm_protected_mode_initialized))
+   __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+   else
+   write_sysreg(0, vttbr_el2);
+}
+#endif /* __KVM_NVHE_MEM_PROTECT__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index e204ea77ab27..ce49795324a7 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
 hyp-main.o hyp-s

[PATCH v3 25/32] KVM: arm64: Sort the hypervisor memblocks

2021-03-02 Thread Quentin Perret

We will soon need to check if a Physical Address belongs to a memblock
at EL2, so make sure to sort them so this can be done efficiently.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/reserved_mem.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm64/kvm/hyp/reserved_mem.c 
b/arch/arm64/kvm/hyp/reserved_mem.c
index fd42705a3c26..83ca23ac259b 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -18,6 +19,23 @@ static unsigned int *hyp_memblock_nr_ptr = 
&kvm_nvhe_sym(hyp_memblock_nr);
 phys_addr_t hyp_mem_base;
 phys_addr_t hyp_mem_size;
 
+static int cmp_hyp_memblock(const void *p1, const void *p2)
+{
+   const struct memblock_region *r1 = p1;
+   const struct memblock_region *r2 = p2;
+
+   return r1->base < r2->base ? -1 : (r1->base > r2->base);
+}
+
+static void __init sort_memblock_regions(void)
+{
+   sort(hyp_memory,
+*hyp_memblock_nr_ptr,
+sizeof(struct memblock_region),
+cmp_hyp_memblock,
+NULL);
+}
+
 static int __init register_memblock_regions(void)
 {
struct memblock_region *reg;
@@ -29,6 +47,7 @@ static int __init register_memblock_regions(void)
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
(*hyp_memblock_nr_ptr)++;
}
+   sort_memblock_regions();
 
return 0;
 }
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 23/32] KVM: arm64: Make memcache anonymous in pgtable allocator

2021-03-02 Thread Quentin Perret

The current stage2 page-table allocator uses a memcache to get
pre-allocated pages when it needs any. To allow re-using this code at
EL2 which uses a concept of memory pools, make the memcache argument of
kvm_pgtable_stage2_map() anonymous, and let the mm_ops zalloc_page()
callbacks use it the way they need to.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 6 +++---
 arch/arm64/kvm/hyp/pgtable.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 21e0985d2e00..9935dbae2cc1 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -213,8 +213,8 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
  * @size:  Size of the mapping.
  * @phys:  Physical address of the memory to map.
  * @prot:  Permissions and attributes for the mapping.
- * @mc:Cache of pre-allocated GFP_PGTABLE_USER memory from 
which to
- * allocate page-table pages.
+ * @mc:Cache of pre-allocated and zeroed memory from which to 
allocate
+ * page-table pages.
  *
  * The offset of @addr within a page is ignored, @size is rounded-up to
  * the next page boundary and @phys is rounded-down to the previous page
@@ -236,7 +236,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
  */
 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
   u64 phys, enum kvm_pgtable_prot prot,
-  struct kvm_mmu_memory_cache *mc);
+  void *mc);
 
 /**
  * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
page-table.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 296675e5600d..bdd6e3d4eeb6 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -445,7 +445,7 @@ struct stage2_map_data {
kvm_pte_t   *anchor;
 
struct kvm_s2_mmu   *mmu;
-   struct kvm_mmu_memory_cache *memcache;
+   void*memcache;
 
struct kvm_pgtable_mm_ops   *mm_ops;
 };
@@ -669,7 +669,7 @@ static int stage2_map_walker(u64 addr, u64 end, u32 level, 
kvm_pte_t *ptep,
 
 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
   u64 phys, enum kvm_pgtable_prot prot,
-  struct kvm_mmu_memory_cache *mc)
+  void *mc)
 {
int ret;
struct stage2_map_data map_data = {
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 28/32] KVM: arm64: Add kvm_pgtable_stage2_idmap_greedy()

2021-03-02 Thread Quentin Perret

Add a new map function to the KVM page-table library that allows to
greedily create block identity-mappings. This will be useful to create
lazily the host stage 2 page-table as it will own most of memory and
will always be identity mapped.

The new helper function creates the mapping in 2 steps: it first walks
the page-table to compute the largest possible granule that can be used
to idmap a given address without overriding existing incompatible
mappings; and then creates a mapping accordingly.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h |  37 +
 arch/arm64/kvm/hyp/pgtable.c | 119 +++
 2 files changed, 156 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index c9f6ed76e0ad..e51dcce69a5e 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -96,6 +96,16 @@ enum kvm_pgtable_prot {
 #define PAGE_HYP_RO(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
+/**
+ * struct kvm_mem_range - Range of Intermediate Physical Addresses
+ * @start: Start of the range.
+ * @end:   End of the range.
+ */
+struct kvm_mem_range {
+   u64 start;
+   u64 end;
+};
+
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table 
walk.
  * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
@@ -379,4 +389,31 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 
addr, u64 size);
 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 struct kvm_pgtable_walker *walker);
 
+/**
+ * kvm_pgtable_stage2_idmap_greedy() - Identity-map an Intermediate Physical
+ *Address with a leaf entry at the highest
+ *possible level.
+ * @pgt:   Page-table structure initialised by kvm_pgtable_*_init().
+ * @addr:  Input address to identity-map.
+ * @prot:  Permissions and attributes for the mapping.
+ * @range: Boundaries of the maximum memory region to map.
+ * @mc:Cache of pre-allocated memory from which to allocate 
page-table
+ * pages.
+ *
+ * This function attempts to install high-level identity-mappings covering 
@addr
+ * without overriding existing mappings with incompatible permissions or
+ * attributes. An existing table entry may be coalesced into a block mapping
+ * if and only if it covers @addr and all its leafs are either invalid and/or
+ * have permissions and attributes strictly matching @prot. The mapping is
+ * guaranteed to be contained within the boundaries specified by @range at call
+ * time. If only a subset of the memory specified by @range is mapped (because
+ * of e.g. alignment issues or existing incompatible mappings), @range will be
+ * updated accordingly.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_idmap_greedy(struct kvm_pgtable *pgt, u64 addr,
+   enum kvm_pgtable_prot prot,
+   struct kvm_mem_range *range,
+   void *mc);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 8aa01a9e2603..6897d771e2b2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -987,3 +987,122 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
pgt->pgd = NULL;
 }
+
+struct stage2_reduce_range_data {
+   kvm_pte_t attr;
+   u64 target_addr;
+   u32 start_level;
+   struct kvm_mem_range *range;
+};
+
+static int __stage2_reduce_range(struct stage2_reduce_range_data *data, u64 
addr)
+{
+   u32 level = data->start_level;
+
+   for (; level < KVM_PGTABLE_MAX_LEVELS; level++) {
+   u64 granule = kvm_granule_size(level);
+   u64 start = ALIGN_DOWN(data->target_addr, granule);
+   u64 end = start + granule;
+
+   /*
+* The pinned address is in the current range, try one level
+* deeper.
+*/
+   if (start == ALIGN_DOWN(addr, granule))
+   continue;
+
+   /*
+* Make sure the current range is a reduction of the existing
+* range before updating it.
+*/
+   if (data->range->start <= start && end <= data->range->end) {
+   data->start_level = level;
+   data->range->start = start;
+   data->range->end = end;
+   return 0;
+   }
+   }
+
+   return -EINVAL;
+}
+
+#define KVM_PTE_LEAF_S2_COMPAT_MASK(KVM_PTE_LEAF_ATTR_S2_PERMS | \
+KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR

[PATCH v3 20/32] KVM: arm64: Refactor kvm_arm_setup_stage2()

2021-03-02 Thread Quentin Perret

In order to re-use some of the stage 2 setup code at EL2, factor parts
of kvm_arm_setup_stage2() out into separate functions.

No functional change intended.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 26 +
 arch/arm64/kvm/hyp/pgtable.c | 32 +
 arch/arm64/kvm/reset.c   | 42 +++-
 3 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index a8255d55c168..21e0985d2e00 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -13,6 +13,16 @@
 
 #define KVM_PGTABLE_MAX_LEVELS 4U
 
+static inline u64 kvm_get_parange(u64 mmfr0)
+{
+   u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+   ID_AA64MMFR0_PARANGE_SHIFT);
+   if (parange > ID_AA64MMFR0_PARANGE_MAX)
+   parange = ID_AA64MMFR0_PARANGE_MAX;
+
+   return parange;
+}
+
 typedef u64 kvm_pte_t;
 
 /**
@@ -159,6 +169,22 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot);
 
+/**
+ * kvm_get_vtcr() - Helper to construct VTCR_EL2
+ * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
+ * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
+ * @phys_shfit:Value to set in VTCR_EL2.T0SZ.
+ *
+ * The VTCR value is common across all the physical CPUs on the system.
+ * We use system wide sanitised values to fill in different fields,
+ * except for Hardware Management of Access Flags. HA Flag is set
+ * unconditionally on all CPUs, as it is safe to run with or without
+ * the feature and the bit is RES0 on CPUs that don't support it.
+ *
+ * Return: VTCR_EL2 value
+ */
+u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
+
 /**
  * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:   Uninitialised page-table structure to initialise.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3d79c8094cdd..296675e5600d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 
 #define KVM_PTE_VALID  BIT(0)
 
@@ -449,6 +450,37 @@ struct stage2_map_data {
struct kvm_pgtable_mm_ops   *mm_ops;
 };
 
+u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
+{
+   u64 vtcr = VTCR_EL2_FLAGS;
+   u8 lvls;
+
+   vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
+   vtcr |= VTCR_EL2_T0SZ(phys_shift);
+   /*
+* Use a minimum 2 level page table to prevent splitting
+* host PMD huge pages at stage2.
+*/
+   lvls = stage2_pgtable_levels(phys_shift);
+   if (lvls < 2)
+   lvls = 2;
+   vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
+
+   /*
+* Enable the Hardware Access Flag management, unconditionally
+* on all CPUs. The features is RES0 on CPUs without the support
+* and must be ignored by the CPUs.
+*/
+   vtcr |= VTCR_EL2_HA;
+
+   /* Set the vmid bits */
+   vtcr |= (get_vmid_bits(mmfr1) == 16) ?
+   VTCR_EL2_VS_16BIT :
+   VTCR_EL2_VS_8BIT;
+
+   return vtcr;
+}
+
 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
struct stage2_map_data *data)
 {
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 47f3f035f3ea..6aae118c960a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -332,19 +332,10 @@ int kvm_set_ipa_limit(void)
return 0;
 }
 
-/*
- * Configure the VTCR_EL2 for this VM. The VTCR value is common
- * across all the physical CPUs on the system. We use system wide
- * sanitised values to fill in different fields, except for Hardware
- * Management of Access Flags. HA Flag is set unconditionally on
- * all CPUs, as it is safe to run with or without the feature and
- * the bit is RES0 on CPUs that don't support it.
- */
 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 {
-   u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
-   u32 parange, phys_shift;
-   u8 lvls;
+   u64 mmfr0, mmfr1;
+   u32 phys_shift;
 
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
return -EINVAL;
@@ -359,33 +350,8 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long 
type)
}
 
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-   parange = cpuid_feature_extract_unsigned_field(mmfr0,
-   ID_AA64MMFR0_PARANGE_SHIFT);
-   if (parange > ID_AA64MMFR0_PARANGE_MAX)
-   parange = ID_AA64MMFR0_PARANGE_MAX;
-   vtcr |= parange << VTCR_EL2_PS_SHIFT;
-
-   vtcr |= VTCR_EL2_T0SZ(phys_shift);
-   /*
-* Use a minimum 2 level page table to prevent splitt

[PATCH v3 18/32] KVM: arm64: Use kvm_arch in kvm_s2_mmu

2021-03-02 Thread Quentin Perret

In order to make use of the stage 2 pgtable code for the host stage 2,
change kvm_s2_mmu to use a kvm_arch pointer in lieu of the kvm pointer,
as the host will have the former but not the latter.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 arch/arm64/include/asm/kvm_mmu.h  | 6 +-
 arch/arm64/kvm/mmu.c  | 8 
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index b9d45a1f8538..90565782ce3e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -94,7 +94,7 @@ struct kvm_s2_mmu {
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
 
-   struct kvm *kvm;
+   struct kvm_arch *arch;
 };
 
 struct kvm_arch_memory_slot {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index ce02a4052dcf..6f743e20cb06 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -272,7 +272,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu 
*mmu)
  */
 static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
 {
-   write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2);
+   write_sysreg(kern_hyp_va(mmu->arch)->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
 
/*
@@ -283,5 +283,9 @@ static __always_inline void __load_guest_stage2(struct 
kvm_s2_mmu *mmu)
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
+static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
+{
+   return container_of(mmu->arch, struct kvm, arch);
+}
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 41f9c03cbcc3..3257cadfab24 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -165,7 +165,7 @@ static void *kvm_host_va(phys_addr_t phys)
 static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, 
u64 size,
 bool may_block)
 {
-   struct kvm *kvm = mmu->kvm;
+   struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
phys_addr_t end = start + size;
 
assert_spin_locked(&kvm->mmu_lock);
@@ -470,7 +470,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu 
*mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
 
-   mmu->kvm = kvm;
+   mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
mmu->vmid.vmid_gen = 0;
@@ -552,7 +552,7 @@ void stage2_unmap_vm(struct kvm *kvm)
 
 void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 {
-   struct kvm *kvm = mmu->kvm;
+   struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
 
spin_lock(&kvm->mmu_lock);
@@ -621,7 +621,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
guest_ipa,
  */
 static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, 
phys_addr_t end)
 {
-   struct kvm *kvm = mmu->kvm;
+   struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
stage2_apply_range_resched(kvm, addr, end, 
kvm_pgtable_stage2_wrprotect);
 }
 
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 13/32] KVM: arm64: Enable access to sanitized CPU features at EL2

2021-03-02 Thread Quentin Perret

Introduce the infrastructure in KVM enabling to copy CPU feature
registers into EL2-owned data-structures, to allow reading sanitised
values directly at EL2 in nVHE.

Given that only a subset of these features are being read by the
hypervisor, the ones that need to be copied are to be listed under
 together with the name of the nVHE variable that
will hold the copy.

While at it, introduce the first user of this infrastructure by
implementing __flush_dcache_area at EL2, which needs
arm64_ftr_reg_ctrel0.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/cpufeature.h |  1 +
 arch/arm64/include/asm/kvm_cpufeature.h | 17 +
 arch/arm64/include/asm/kvm_host.h   |  4 
 arch/arm64/kernel/cpufeature.c  | 13 +
 arch/arm64/kvm/hyp/nvhe/Makefile|  3 ++-
 arch/arm64/kvm/hyp/nvhe/cache.S | 13 +
 arch/arm64/kvm/hyp/nvhe/cpufeature.c|  8 
 arch/arm64/kvm/sys_regs.c   | 21 +
 8 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/kvm_cpufeature.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/cache.S
 create mode 100644 arch/arm64/kvm/hyp/nvhe/cpufeature.c

diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
index 61177bac49fa..a85cea2cac57 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -607,6 +607,7 @@ void check_local_cpu_capabilities(void);
 
 u64 read_sanitised_ftr_reg(u32 id);
 u64 __read_sysreg_by_encoding(u32 sys_id);
+int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst);
 
 static inline bool cpu_supports_mixed_endian_el0(void)
 {
diff --git a/arch/arm64/include/asm/kvm_cpufeature.h 
b/arch/arm64/include/asm/kvm_cpufeature.h
new file mode 100644
index ..d34f85cba358
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_cpufeature.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret 
+ */
+
+#include 
+
+#ifndef KVM_HYP_CPU_FTR_REG
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define KVM_HYP_CPU_FTR_REG(id, name) extern struct arm64_ftr_reg name;
+#else
+#define KVM_HYP_CPU_FTR_REG(id, name) DECLARE_KVM_NVHE_SYM(name);
+#endif
+#endif
+
+KVM_HYP_CPU_FTR_REG(SYS_CTR_EL0, arm64_ftr_reg_ctrel0)
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 06ca4828005f..459ee557f87c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -751,9 +751,13 @@ void kvm_clr_pmu_events(u32 clr);
 
 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
 void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+
+void setup_kvm_el2_caps(void);
 #else
 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
+
+static inline void setup_kvm_el2_caps(void) {}
 #endif
 
 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 066030717a4c..f2d8b479ff74 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1154,6 +1154,18 @@ u64 read_sanitised_ftr_reg(u32 id)
 }
 EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
 
+int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst)
+{
+   struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+   if (!regp)
+   return -EINVAL;
+
+   memcpy(dst, regp, sizeof(*regp));
+
+   return 0;
+}
+
 #define read_sysreg_case(r)\
case r: val = read_sysreg_s(r); break;
 
@@ -2773,6 +2785,7 @@ void __init setup_cpu_features(void)
 
setup_system_capabilities();
setup_elf_hwcaps(arm64_elf_hwcaps);
+   setup_kvm_el2_caps();
 
if (system_supports_32bit_el0())
setup_elf_hwcaps(compat_elf_hwcaps);
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 6894a917f290..0033591553fc 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -13,7 +13,8 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o
+hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
+cache.o cpufeature.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 ../fpsimd.o ../hyp-entry.o ../exception.o
 obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
new file mode 100644
index ..36cef6915428
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Code copied from arch/arm64/mm/cache.S.
+ */
+
+#include 
+#include 
+#include 
+
+SYM_FUNC_START_PI(__f

[PATCH v3 10/32] KVM: arm64: Introduce an early Hyp page allocator

2021-03-02 Thread Quentin Perret

With nVHE, the host currently creates all stage 1 hypervisor mappings at
EL1 during boot, installs them at EL2, and extends them as required
(e.g. when creating a new VM). But in a world where the host is no
longer trusted, it cannot have full control over the code mapped in the
hypervisor.

In preparation for enabling the hypervisor to create its own stage 1
mappings during boot, introduce an early page allocator, with minimal
functionality. This allocator is designed to be used only during early
bootstrap of the hyp code when memory protection is enabled, which will
then switch to using a full-fledged page allocator after init.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h | 14 +
 arch/arm64/kvm/hyp/include/nvhe/memory.h  | 24 +
 arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
 arch/arm64/kvm/hyp/nvhe/early_alloc.c | 54 +++
 arch/arm64/kvm/hyp/nvhe/psci-relay.c  |  4 +-
 5 files changed, 94 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/memory.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/early_alloc.c

diff --git a/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h 
b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
new file mode 100644
index ..dc61aaa56f31
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_EARLY_ALLOC_H
+#define __KVM_HYP_EARLY_ALLOC_H
+
+#include 
+
+void hyp_early_alloc_init(void *virt, unsigned long size);
+unsigned long hyp_early_alloc_nr_used_pages(void);
+void *hyp_early_alloc_page(void *arg);
+void *hyp_early_alloc_contig(unsigned int nr_pages);
+
+extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+
+#endif /* __KVM_HYP_EARLY_ALLOC_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h 
b/arch/arm64/kvm/hyp/include/nvhe/memory.h
new file mode 100644
index ..3e49eaa7e682
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_MEMORY_H
+#define __KVM_HYP_MEMORY_H
+
+#include 
+
+#include 
+
+extern s64 hyp_physvirt_offset;
+
+#define __hyp_pa(virt) ((phys_addr_t)(virt) + hyp_physvirt_offset)
+#define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset))
+
+static inline void *hyp_phys_to_virt(phys_addr_t phys)
+{
+   return __hyp_va(phys);
+}
+
+static inline phys_addr_t hyp_virt_to_phys(void *addr)
+{
+   return __hyp_pa(addr);
+}
+
+#endif /* __KVM_HYP_MEMORY_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index bc98f8e3d1da..24ff99e2eac5 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-hyp-main.o hyp-smp.o psci-relay.o
+hyp-main.o hyp-smp.o psci-relay.o early_alloc.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 ../fpsimd.o ../hyp-entry.o ../exception.o
 obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c 
b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
new file mode 100644
index ..1306c430ab87
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret 
+ */
+
+#include 
+
+#include 
+#include 
+
+struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+s64 __ro_after_init hyp_physvirt_offset;
+
+static unsigned long base;
+static unsigned long end;
+static unsigned long cur;
+
+unsigned long hyp_early_alloc_nr_used_pages(void)
+{
+   return (cur - base) >> PAGE_SHIFT;
+}
+
+void *hyp_early_alloc_contig(unsigned int nr_pages)
+{
+   unsigned long size = (nr_pages << PAGE_SHIFT);
+   void *ret = (void *)cur;
+
+   if (!nr_pages)
+   return NULL;
+
+   if (end - cur < size)
+   return NULL;
+
+   cur += size;
+   memset(ret, 0, size);
+
+   return ret;
+}
+
+void *hyp_early_alloc_page(void *arg)
+{
+   return hyp_early_alloc_contig(1);
+}
+
+void hyp_early_alloc_init(void *virt, unsigned long size)
+{
+   base = cur = (unsigned long)virt;
+   end = base + size;
+
+   hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
+   hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
+   hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c 
b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 63de71c0481e..08508783ec3d 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 
+#include 
 #incl

[PATCH v3 11/32] KVM: arm64: Stub CONFIG_DEBUG_LIST at Hyp

2021-03-02 Thread Quentin Perret

In order to use the kernel list library at EL2, introduce stubs for the
CONFIG_DEBUG_LIST out-of-lines calls.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/nvhe/Makefile |  2 +-
 arch/arm64/kvm/hyp/nvhe/stub.c   | 22 ++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kvm/hyp/nvhe/stub.c

diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 24ff99e2eac5..144da72ad510 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-hyp-main.o hyp-smp.o psci-relay.o early_alloc.o
+hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 ../fpsimd.o ../hyp-entry.o ../exception.o
 obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c
new file mode 100644
index ..c0aa6bbfd79d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/stub.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Stubs for out-of-line function calls caused by re-using kernel
+ * infrastructure at EL2.
+ *
+ * Copyright (C) 2020 - Google LLC
+ */
+
+#include 
+
+#ifdef CONFIG_DEBUG_LIST
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
+{
+   return true;
+}
+
+bool __list_del_entry_valid(struct list_head *entry)
+{
+   return true;
+}
+#endif
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 08/32] KVM: arm64: Make kvm_call_hyp() a function call at Hyp

2021-03-02 Thread Quentin Perret

kvm_call_hyp() has some logic to issue a function call or a hypercall
depending on the EL at which the kernel is running. However, all the
code compiled under __KVM_NVHE_HYPERVISOR__ is guaranteed to only run
at EL2 which allows us to simplify.

Add ifdefery to kvm_host.h to simplify kvm_call_hyp() in .hyp.text.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_host.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 3d10e6527f7d..06ca4828005f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -591,6 +591,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_arm_halt_guest(struct kvm *kvm);
 void kvm_arm_resume_guest(struct kvm *kvm);
 
+#ifndef __KVM_NVHE_HYPERVISOR__
 #define kvm_call_hyp_nvhe(f, ...)  
\
({  \
struct arm_smccc_res res;   \
@@ -630,6 +631,11 @@ void kvm_arm_resume_guest(struct kvm *kvm);
\
ret;\
})
+#else /* __KVM_NVHE_HYPERVISOR__ */
+#define kvm_call_hyp(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
+#endif /* __KVM_NVHE_HYPERVISOR__ */
 
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 16/32] KVM: arm64: Elevate hypervisor mappings creation at EL2

2021-03-02 Thread Quentin Perret

Previous commits have introduced infrastructure to enable the EL2 code
to manage its own stage 1 mappings. However, this was preliminary work,
and none of it is currently in use.

Put all of this together by elevating the mapping creation at EL2 when
memory protection is enabled. In this case, the host kernel running
at EL1 still creates _temporary_ EL2 mappings, only used while
initializing the hypervisor, but frees them right after.

As such, all calls to create_hyp_mappings() after kvm init has finished
turn into hypercalls, as the host now has no 'legal' way to modify the
hypevisor page tables directly.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_mmu.h |  2 +-
 arch/arm64/kvm/arm.c | 87 +---
 arch/arm64/kvm/mmu.c | 43 ++--
 3 files changed, 120 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 5c42ec023cc7..ce02a4052dcf 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -166,7 +166,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
 
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
-int kvm_mmu_init(void);
+int kvm_mmu_init(u32 *hyp_va_bits);
 
 static inline void *__kvm_vector_slot2addr(void *base,
   enum arm64_hyp_spectre_vector slot)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 26e573cdede3..5a97abdc3ba6 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1421,7 +1421,7 @@ static void cpu_prepare_hyp_mode(int cpu)
kvm_flush_dcache_to_poc(params, sizeof(*params));
 }
 
-static void cpu_init_hyp_mode(void)
+static void hyp_install_host_vector(void)
 {
struct kvm_nvhe_init_params *params;
struct arm_smccc_res res;
@@ -1439,6 +1439,11 @@ static void cpu_init_hyp_mode(void)
params = this_cpu_ptr_nvhe_sym(kvm_init_params);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), 
virt_to_phys(params), &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
+}
+
+static void cpu_init_hyp_mode(void)
+{
+   hyp_install_host_vector();
 
/*
 * Disabling SSBD on a non-VHE system requires us to enable SSBS
@@ -1481,7 +1486,10 @@ static void cpu_set_hyp_vector(void)
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
void *vector = hyp_spectre_vector_selector[data->slot];
 
-   *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+   if (!is_protected_kvm_enabled())
+   *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+   else
+   kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
 }
 
 static void cpu_hyp_reinit(void)
@@ -1489,13 +1497,14 @@ static void cpu_hyp_reinit(void)

kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
 
cpu_hyp_reset();
-   cpu_set_hyp_vector();
 
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
cpu_init_hyp_mode();
 
+   cpu_set_hyp_vector();
+
kvm_arm_init_debug();
 
if (vgic_present)
@@ -1691,18 +1700,59 @@ static void teardown_hyp_mode(void)
}
 }
 
+static int do_pkvm_init(u32 hyp_va_bits)
+{
+   void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
+   int ret;
+
+   preempt_disable();
+   hyp_install_host_vector();
+   ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
+   num_possible_cpus(), kern_hyp_va(per_cpu_base),
+   hyp_va_bits);
+   preempt_enable();
+
+   return ret;
+}
+
+static int kvm_hyp_init_protection(u32 hyp_va_bits)
+{
+   void *addr = phys_to_virt(hyp_mem_base);
+   int ret;
+
+   ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
+   if (ret)
+   return ret;
+
+   ret = do_pkvm_init(hyp_va_bits);
+   if (ret)
+   return ret;
+
+   free_hyp_pgds();
+
+   return 0;
+}
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
 static int init_hyp_mode(void)
 {
+   u32 hyp_va_bits;
int cpu;
-   int err = 0;
+   int err = -ENOMEM;
+
+   /*
+* The protected Hyp-mode cannot be initialized if the memory pool
+* allocation has failed.
+*/
+   if (is_protected_kvm_enabled() && !hyp_mem_base)
+   return err;
 
/*
 * Allocate Hyp PGD and setup Hyp identity mapping
 */
-   err = kvm_mmu_init();
+   err = kvm_mmu_init(&hyp_va_bits);
if (err)
goto out_err;
 
@@ -1818,6 +1868,14 @@ static int init_hyp_mode(void)
goto out_err;
}
 
+   if (is_protected_kvm_enabled()) {
+   err = kvm_hyp_init_protection(hyp_va_bits);
+   if (err) {
+   kvm_err("Failed to init h

[PATCH v3 22/32] KVM: arm64: Refactor __populate_fault_info()

2021-03-02 Thread Quentin Perret

Refactor __populate_fault_info() to introduce __get_fault_info() which
will be used once the host is wrapped in a stage 2.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 37 ++---
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h 
b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 6c1f51f25eb3..1f017c9851bb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -160,19 +160,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 
*hpfar)
return true;
 }
 
-static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
 {
-   u8 ec;
-   u64 esr;
-   u64 hpfar, far;
-
-   esr = vcpu->arch.fault.esr_el2;
-   ec = ESR_ELx_EC(esr);
-
-   if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
-   return true;
-
-   far = read_sysreg_el2(SYS_FAR);
+   fault->far_el2 = read_sysreg_el2(SYS_FAR);
 
/*
 * The HPFAR can be invalid if the stage 2 fault did not
@@ -188,14 +178,29 @@ static inline bool __populate_fault_info(struct kvm_vcpu 
*vcpu)
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
 (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
-   if (!__translate_far_to_hpfar(far, &hpfar))
+   if (!__translate_far_to_hpfar(fault->far_el2, 
&fault->hpfar_el2))
return false;
} else {
-   hpfar = read_sysreg(hpfar_el2);
+   fault->hpfar_el2 = read_sysreg(hpfar_el2);
}
 
-   vcpu->arch.fault.far_el2 = far;
-   vcpu->arch.fault.hpfar_el2 = hpfar;
+   return true;
+}
+
+static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+   u8 ec;
+   u64 esr;
+
+   esr = vcpu->arch.fault.esr_el2;
+   ec = ESR_ELx_EC(esr);
+
+   if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+   return true;
+
+   if (!__get_fault_info(esr, &vcpu->arch.fault))
+   return false;
+
return true;
 }
 
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 15/32] KVM: arm64: Prepare the creation of s1 mappings at EL2

2021-03-02 Thread Quentin Perret

When memory protection is enabled, the EL2 code needs the ability to
create and manage its own page-table. To do so, introduce a new set of
hypercalls to bootstrap a memory management system at EL2.

This leads to the following boot flow in nVHE Protected mode:

 1. the host allocates memory for the hypervisor very early on, using
the memblock API;

 2. the host creates a set of stage 1 page-table for EL2, installs the
EL2 vectors, and issues the __pkvm_init hypercall;

 3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
and stores it in the memory pool provided by the host;

 4. the hypervisor then extends its stage 1 mappings to include a
vmemmap in the EL2 VA space, hence allowing to use the buddy
allocator introduced in a previous patch;

 5. the hypervisor jumps back in the idmap page, switches from the
host-provided page-table to the new one, and wraps up its
initialization by enabling the new allocator, before returning to
the host.

 6. the host can free the now unused page-table created for EL2, and
will now need to issue hypercalls to make changes to the EL2 stage 1
mappings instead of modifying them directly.

Note that for the sake of simplifying the review, this patch focuses on
the hypervisor side of things. In other words, this only implements the
new hypercalls, but does not make use of them from the host yet. The
host-side changes will follow in a subsequent patch.

Credits to Will for __pkvm_init_switch_pgd.

Co-authored-by: Will Deacon 
Signed-off-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h |   4 +
 arch/arm64/include/asm/kvm_host.h|   7 +
 arch/arm64/include/asm/kvm_hyp.h |   8 ++
 arch/arm64/include/asm/kvm_pgtable.h |   2 +
 arch/arm64/kernel/image-vars.h   |  16 +++
 arch/arm64/kvm/hyp/Makefile  |   2 +-
 arch/arm64/kvm/hyp/include/nvhe/mm.h |  71 ++
 arch/arm64/kvm/hyp/nvhe/Makefile |   4 +-
 arch/arm64/kvm/hyp/nvhe/hyp-init.S   |  31 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c   |  49 +++
 arch/arm64/kvm/hyp/nvhe/mm.c | 173 
 arch/arm64/kvm/hyp/nvhe/setup.c  | 195 +++
 arch/arm64/kvm/hyp/pgtable.c |   2 -
 arch/arm64/kvm/hyp/reserved_mem.c|  92 +
 arch/arm64/mm/init.c |   3 +
 15 files changed, 654 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mm.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/mm.c
 create mode 100644 arch/arm64/kvm/hyp/nvhe/setup.c
 create mode 100644 arch/arm64/kvm/hyp/reserved_mem.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 22d933e9b59e..db20a9477870 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -57,6 +57,10 @@
 #define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2   12
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs  13
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs   14
+#define __KVM_HOST_SMCCC_FUNC___pkvm_init  15
+#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings   16
+#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping17
+#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector18
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 459ee557f87c..b9d45a1f8538 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -781,5 +781,12 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
 
 int kvm_trng_call(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM
+extern phys_addr_t hyp_mem_base;
+extern phys_addr_t hyp_mem_size;
+void __init kvm_hyp_reserve(void);
+#else
+static inline void kvm_hyp_reserve(void) { }
+#endif
 
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c0450828378b..ae55351b99a4 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -100,4 +100,12 @@ void __noreturn hyp_panic(void);
 void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
 #endif
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size,
+   phys_addr_t pgd, void *sp, void *cont_fn);
+int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
+   unsigned long *per_cpu_base, u32 hyp_va_bits);
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+#endif
+
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 3c306f90f7da..7898610fbeeb 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+#define K

[PATCH v3 03/32] arm64: kvm: Add standalone ticket spinlock implementation for use at hyp

2021-03-02 Thread Quentin Perret

From: Will Deacon 

We will soon need to synchronise multiple CPUs in the hyp text at EL2.
The qspinlock-based locking used by the host is overkill for this purpose
and relies on the kernel's "percpu" implementation for the MCS nodes.

Implement a simple ticket locking scheme based heavily on the code removed
by commit c11090474d70 ("arm64: locking: Replace ticket lock implementation
with qspinlock").

Signed-off-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 92 ++
 1 file changed, 92 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/spinlock.h

diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h 
b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
new file mode 100644
index ..76b537f8d1c6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * A stand-alone ticket spinlock implementation for use by the non-VHE
+ * KVM hypervisor code running at EL2.
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon 
+ *
+ * Heavily based on the implementation removed by c11090474d70 which was:
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ARM64_KVM_NVHE_SPINLOCK_H__
+#define __ARM64_KVM_NVHE_SPINLOCK_H__
+
+#include 
+#include 
+
+typedef union hyp_spinlock {
+   u32 __val;
+   struct {
+#ifdef __AARCH64EB__
+   u16 next, owner;
+#else
+   u16 owner, next;
+#endif
+   };
+} hyp_spinlock_t;
+
+#define hyp_spin_lock_init(l)  \
+do {   \
+   *(l) = (hyp_spinlock_t){ .__val = 0 };  \
+} while (0)
+
+static inline void hyp_spin_lock(hyp_spinlock_t *lock)
+{
+   u32 tmp;
+   hyp_spinlock_t lockval, newval;
+
+   asm volatile(
+   /* Atomically increment the next ticket. */
+   ARM64_LSE_ATOMIC_INSN(
+   /* LL/SC */
+"  prfmpstl1strm, %3\n"
+"1:ldaxr   %w0, %3\n"
+"  add %w1, %w0, #(1 << 16)\n"
+"  stxr%w2, %w1, %3\n"
+"  cbnz%w2, 1b\n",
+   /* LSE atomics */
+"  mov %w2, #(1 << 16)\n"
+"  ldadda  %w2, %w0, %3\n"
+   __nops(3))
+
+   /* Did we get the lock? */
+"  eor %w1, %w0, %w0, ror #16\n"
+"  cbz %w1, 3f\n"
+   /*
+* No: spin on the owner. Send a local event to avoid missing an
+* unlock before the exclusive load.
+*/
+"  sevl\n"
+"2:wfe\n"
+"  ldaxrh  %w2, %4\n"
+"  eor %w1, %w2, %w0, lsr #16\n"
+"  cbnz%w1, 2b\n"
+   /* We got the lock. Critical section starts here. */
+"3:"
+   : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
+   : "Q" (lock->owner)
+   : "memory");
+}
+
+static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
+{
+   u64 tmp;
+
+   asm volatile(
+   ARM64_LSE_ATOMIC_INSN(
+   /* LL/SC */
+   "   ldrh%w1, %0\n"
+   "   add %w1, %w1, #1\n"
+   "   stlrh   %w1, %0",
+   /* LSE atomics */
+   "   mov %w1, #1\n"
+   "   staddlh %w1, %0\n"
+   __nops(1))
+   : "=Q" (lock->owner), "=&r" (tmp)
+   :
+   : "memory");
+}
+
+#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 02/32] KVM: arm64: Link position-independent string routines into .hyp.text

2021-03-02 Thread Quentin Perret

From: Will Deacon 

Pull clear_page(), copy_page(), memcpy() and memset() into the nVHE hyp
code and ensure that we always execute the '__pi_' entry point on the
offchance that it changes in future.

[ qperret: Commit title nits and added linker script alias ]

Signed-off-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/hyp_image.h |  3 +++
 arch/arm64/kernel/image-vars.h | 11 +++
 arch/arm64/kvm/hyp/nvhe/Makefile   |  4 
 3 files changed, 18 insertions(+)

diff --git a/arch/arm64/include/asm/hyp_image.h 
b/arch/arm64/include/asm/hyp_image.h
index 737ded6b6d0d..78cd77990c9c 100644
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -56,6 +56,9 @@
  */
 #define KVM_NVHE_ALIAS(sym)kvm_nvhe_sym(sym) = sym;
 
+/* Defines a linker script alias for KVM nVHE hyp symbols */
+#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec);
+
 #endif /* LINKER_SCRIPT */
 
 #endif /* __ARM64_HYP_IMAGE_H__ */
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 5aa9ed1e9ec6..4eb7a15c8b60 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -104,6 +104,17 @@ KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
 /* PMU available static key */
 KVM_NVHE_ALIAS(kvm_arm_pmu_available);
 
+/* Position-independent library routines */
+KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
+KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
+KVM_NVHE_ALIAS_HYP(memcpy, __pi_memcpy);
+KVM_NVHE_ALIAS_HYP(memset, __pi_memset);
+
+#ifdef CONFIG_KASAN
+KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
+KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
+#endif
+
 #endif /* CONFIG_KVM */
 
 #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index a6707df4f6c0..bc98f8e3d1da 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -9,10 +9,14 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
 hostprogs := gen-hyprel
 HOST_EXTRACFLAGS += -I$(objtree)/include
 
+lib-objs := clear_page.o copy_page.o memcpy.o memset.o
+lib-objs := $(addprefix ../../../lib/, $(lib-objs))
+
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
 hyp-main.o hyp-smp.o psci-relay.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 ../fpsimd.o ../hyp-entry.o ../exception.o
+obj-y += $(lib-objs)
 
 ##
 ## Build rules for compiling nVHE hyp code
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 14/32] KVM: arm64: Factor out vector address calculation

2021-03-02 Thread Quentin Perret

In order to re-map the guest vectors at EL2 when pKVM is enabled,
refactor __kvm_vector_slot2idx() and kvm_init_vector_slot() to move all
the address calculation logic in a static inline function.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_mmu.h | 8 
 arch/arm64/kvm/arm.c | 9 +
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 90873851f677..5c42ec023cc7 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -168,6 +168,14 @@ phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 
+static inline void *__kvm_vector_slot2addr(void *base,
+  enum arm64_hyp_spectre_vector slot)
+{
+   int idx = slot - (slot != HYP_VECTOR_DIRECT);
+
+   return base + (idx * SZ_2K);
+}
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)   __flush_dcache_area((a), (l))
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3f8bcf8db036..26e573cdede3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1345,16 +1345,9 @@ static unsigned long nvhe_percpu_order(void)
 /* A lookup table holding the hypervisor VA for each vector slot */
 static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
 
-static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
-{
-   return slot - (slot != HYP_VECTOR_DIRECT);
-}
-
 static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector 
slot)
 {
-   int idx = __kvm_vector_slot2idx(slot);
-
-   hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
+   hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
 }
 
 static int kvm_init_vector_slots(void)
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 06/32] KVM: arm64: Factor memory allocation out of pgtable.c

2021-03-02 Thread Quentin Perret

In preparation for enabling the creation of page-tables at EL2, factor
all memory allocation out of the page-table code, hence making it
re-usable with any compatible memory allocator.

No functional changes intended.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 41 +++-
 arch/arm64/kvm/hyp/pgtable.c | 98 +---
 arch/arm64/kvm/mmu.c | 66 ++-
 3 files changed, 163 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 8886d43cfb11..3c306f90f7da 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -13,17 +13,50 @@
 
 typedef u64 kvm_pte_t;
 
+/**
+ * struct kvm_pgtable_mm_ops - Memory management callbacks.
+ * @zalloc_page:   Allocate a single zeroed memory page. The @arg parameter
+ * can be used by the walker to pass a memcache. The
+ * initial refcount of the page is 1.
+ * @zalloc_pages_exact:Allocate an exact number of zeroed memory 
pages. The
+ * @size parameter is in bytes, it is automatically rounded
+ * to PAGE_SIZE and the resulting allocation is physically
+ * contiguous.
+ * @free_pages_exact:  Free an exact number of memory pages, to free memory
+ * allocated with zalloc_pages_exact.
+ * @get_page:  Increment the refcount on a page.
+ * @put_page:  Decrement the refcount on a page. When the refcount
+ * reaches 0 the page is automatically freed.
+ * @page_count:Return the refcount of a page.
+ * @phys_to_virt:  Convert a physical address into a virtual address as
+ * accessible in the current context.
+ * @virt_to_phys:  Convert a virtual address in the current context into a
+ * physical address.
+ */
+struct kvm_pgtable_mm_ops {
+   void*   (*zalloc_page)(void *arg);
+   void*   (*zalloc_pages_exact)(size_t size);
+   void(*free_pages_exact)(void *addr, size_t size);
+   void(*get_page)(void *addr);
+   void(*put_page)(void *addr);
+   int (*page_count)(void *addr);
+   void*   (*phys_to_virt)(phys_addr_t phys);
+   phys_addr_t (*virt_to_phys)(void *addr);
+};
+
 /**
  * struct kvm_pgtable - KVM page-table.
  * @ia_bits:   Maximum input address size, in bits.
  * @start_level:   Level at which the page-table walk starts.
  * @pgd:   Pointer to the first top-level entry of the page-table.
+ * @mm_ops:Memory management callbacks.
  * @mmu:   Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
  */
 struct kvm_pgtable {
u32 ia_bits;
u32 start_level;
kvm_pte_t   *pgd;
+   struct kvm_pgtable_mm_ops   *mm_ops;
 
/* Stage-2 only */
struct kvm_s2_mmu   *mmu;
@@ -86,10 +119,12 @@ struct kvm_pgtable_walker {
  * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
  * @pgt:   Uninitialised page-table structure to initialise.
  * @va_bits:   Maximum virtual address bits.
+ * @mm_ops:Memory management callbacks.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits);
+int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
+struct kvm_pgtable_mm_ops *mm_ops);
 
 /**
  * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
@@ -126,10 +161,12 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 
addr, u64 size, u64 phys,
  * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:   Uninitialised page-table structure to initialise.
  * @kvm:   KVM structure representing the guest virtual machine.
+ * @mm_ops:Memory management callbacks.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm,
+   struct kvm_pgtable_mm_ops *mm_ops);
 
 /**
  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 81fe032f34d1..b975a67d1f85 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -152,9 +152,9 @@ static kvm_pte_t kvm_phys_to_pte(u64 pa)
return pte;
 }
 
-static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
+static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops 
*mm_ops)
 {
-   return __va(kvm_pte_to_phys(pte));
+   return mm_op

[PATCH v3 17/32] KVM: arm64: Use kvm_arch for stage 2 pgtable

2021-03-02 Thread Quentin Perret

In order to make use of the stage 2 pgtable code for the host stage 2,
use struct kvm_arch in lieu of struct kvm as the host will have the
former but not the latter.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 5 +++--
 arch/arm64/kvm/hyp/pgtable.c | 6 +++---
 arch/arm64/kvm/mmu.c | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 7898610fbeeb..a8255d55c168 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -162,12 +162,13 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 
addr, u64 size, u64 phys,
 /**
  * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:   Uninitialised page-table structure to initialise.
- * @kvm:   KVM structure representing the guest virtual machine.
+ * @arch:  Arch-specific KVM structure representing the guest virtual
+ * machine.
  * @mm_ops:Memory management callbacks.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm,
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
struct kvm_pgtable_mm_ops *mm_ops);
 
 /**
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 7ce0969203e8..3d79c8094cdd 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -879,11 +879,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 
addr, u64 size)
return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm,
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
struct kvm_pgtable_mm_ops *mm_ops)
 {
size_t pgd_sz;
-   u64 vtcr = kvm->arch.vtcr;
+   u64 vtcr = arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -896,7 +896,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct 
kvm *kvm,
pgt->ia_bits= ia_bits;
pgt->start_level= start_level;
pgt->mm_ops = mm_ops;
-   pgt->mmu= &kvm->arch.mmu;
+   pgt->mmu= &arch->mmu;
 
/* Ensure zeroed PGD pages are visible to the hardware walker */
dsb(ishst);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 9d331bf262d2..41f9c03cbcc3 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -457,7 +457,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu 
*mmu)
if (!pgt)
return -ENOMEM;
 
-   err = kvm_pgtable_stage2_init(pgt, kvm, &kvm_s2_mm_ops);
+   err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
 
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 12/32] KVM: arm64: Introduce a Hyp buddy page allocator

2021-03-02 Thread Quentin Perret

When memory protection is enabled, the hyp code will require a basic
form of memory management in order to allocate and free memory pages at
EL2. This is needed for various use-cases, including the creation of hyp
mappings or the allocation of stage 2 page tables.

To address these use-case, introduce a simple memory allocator in the
hyp code. The allocator is designed as a conventional 'buddy allocator',
working with a page granularity. It allows to allocate and free
physically contiguous pages from memory 'pools', with a guaranteed order
alignment in the PA space. Each page in a memory pool is associated
with a struct hyp_page which holds the page's metadata, including its
refcount, as well as its current order, hence mimicking the kernel's
buddy system in the GFP infrastructure. The hyp_page metadata are made
accessible through a hyp_vmemmap, following the concept of
SPARSE_VMEMMAP in the kernel.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/gfp.h|  55 +++
 arch/arm64/kvm/hyp/include/nvhe/memory.h |  28 
 arch/arm64/kvm/hyp/nvhe/Makefile |   2 +-
 arch/arm64/kvm/hyp/nvhe/page_alloc.c | 195 +++
 4 files changed, 279 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/gfp.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/page_alloc.c

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h 
b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
new file mode 100644
index ..d039086d86b5
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_GFP_H
+#define __KVM_HYP_GFP_H
+
+#include 
+
+#include 
+#include 
+
+#define HYP_NO_ORDER   UINT_MAX
+
+struct hyp_pool {
+   /*
+* Spinlock protecting concurrent changes to the memory pool as well as
+* the struct hyp_page of the pool's pages until we have a proper atomic
+* API at EL2.
+*/
+   hyp_spinlock_t lock;
+   struct list_head free_area[MAX_ORDER];
+   phys_addr_t range_start;
+   phys_addr_t range_end;
+   unsigned int max_order;
+};
+
+static inline void hyp_page_ref_inc(struct hyp_page *p)
+{
+   struct hyp_pool *pool = hyp_page_to_pool(p);
+
+   hyp_spin_lock(&pool->lock);
+   p->refcount++;
+   hyp_spin_unlock(&pool->lock);
+}
+
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+   struct hyp_pool *pool = hyp_page_to_pool(p);
+   int ret;
+
+   hyp_spin_lock(&pool->lock);
+   p->refcount--;
+   ret = (p->refcount == 0);
+   hyp_spin_unlock(&pool->lock);
+
+   return ret;
+}
+
+/* Allocation */
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
+void hyp_get_page(void *addr);
+void hyp_put_page(void *addr);
+
+/* Used pages cannot be freed */
+int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
+ unsigned int reserved_pages);
+#endif /* __KVM_HYP_GFP_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h 
b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 3e49eaa7e682..d2fb307c5952 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -6,7 +6,17 @@
 
 #include 
 
+struct hyp_pool;
+struct hyp_page {
+   unsigned int refcount;
+   unsigned int order;
+   struct hyp_pool *pool;
+   struct list_head node;
+};
+
 extern s64 hyp_physvirt_offset;
+extern u64 __hyp_vmemmap;
+#define hyp_vmemmap ((struct hyp_page *)__hyp_vmemmap)
 
 #define __hyp_pa(virt) ((phys_addr_t)(virt) + hyp_physvirt_offset)
 #define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset))
@@ -21,4 +31,22 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
return __hyp_pa(addr);
 }
 
+#define hyp_phys_to_pfn(phys)  ((phys) >> PAGE_SHIFT)
+#define hyp_pfn_to_phys(pfn)   ((phys_addr_t)((pfn) << PAGE_SHIFT))
+#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
+#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
+#define hyp_virt_to_pfn(virt)  hyp_phys_to_pfn(__hyp_pa(virt))
+
+#define hyp_page_to_pfn(page)  ((struct hyp_page *)(page) - hyp_vmemmap)
+#define hyp_page_to_phys(page)  hyp_pfn_to_phys((hyp_page_to_pfn(page)))
+#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
+#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
+
+static inline int hyp_page_count(void *addr)
+{
+   struct hyp_page *p = hyp_virt_to_page(addr);
+
+   return p->refcount;
+}
+
 #endif /* __KVM_HYP_MEMORY_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 144da72ad510..6894a917f290 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-hyp-main.o h

[PATCH v3 05/32] KVM: arm64: Avoid free_page() in page-table allocator

2021-03-02 Thread Quentin Perret

Currently, the KVM page-table allocator uses a mix of put_page() and
free_page() calls depending on the context even though page-allocation
is always achieved using variants of __get_free_page().

Make the code consistent by using put_page() throughout, and reduce the
memory management API surface used by the page-table code. This will
ease factoring out page-allocation from pgtable.c, which is a
pre-requisite to creating page-tables at EL2.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/pgtable.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 4d177ce1d536..81fe032f34d1 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -413,7 +413,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 
va_bits)
 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
   enum kvm_pgtable_walk_flags flag, void * const arg)
 {
-   free_page((unsigned long)kvm_pte_follow(*ptep));
+   put_page(virt_to_page(kvm_pte_follow(*ptep)));
return 0;
 }
 
@@ -425,7 +425,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
};
 
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
-   free_page((unsigned long)pgt->pgd);
+   put_page(virt_to_page(pgt->pgd));
pgt->pgd = NULL;
 }
 
@@ -577,7 +577,7 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, 
u32 level,
if (!data->anchor)
return 0;
 
-   free_page((unsigned long)kvm_pte_follow(*ptep));
+   put_page(virt_to_page(kvm_pte_follow(*ptep)));
put_page(virt_to_page(ptep));
 
if (data->anchor == ptep) {
@@ -700,7 +700,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 
level, kvm_pte_t *ptep,
}
 
if (childp)
-   free_page((unsigned long)childp);
+   put_page(virt_to_page(childp));
 
return 0;
 }
@@ -897,7 +897,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, 
kvm_pte_t *ptep,
put_page(virt_to_page(ptep));
 
if (kvm_pte_table(pte, level))
-   free_page((unsigned long)kvm_pte_follow(pte));
+   put_page(virt_to_page(kvm_pte_follow(pte)));
 
return 0;
 }
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 21/32] KVM: arm64: Refactor __load_guest_stage2()

2021-03-02 Thread Quentin Perret

Refactor __load_guest_stage2() to introduce __load_stage2() which will
be re-used when loading the host stage 2.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_mmu.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6f743e20cb06..9d64fa73ee67 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -270,9 +270,9 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu 
*mmu)
  * Must be called from hyp code running at EL2 with an updated VTTBR
  * and interrupts disabled.
  */
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned 
long vtcr)
 {
-   write_sysreg(kern_hyp_va(mmu->arch)->vtcr, vtcr_el2);
+   write_sysreg(vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
 
/*
@@ -283,6 +283,11 @@ static __always_inline void __load_guest_stage2(struct 
kvm_s2_mmu *mmu)
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+{
+   __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
+}
+
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
 {
return container_of(mmu->arch, struct kvm, arch);
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 00/32] KVM: arm64: A stage 2 for the host

2021-03-02 Thread Quentin Perret

Hi all,

This is the v3 of the series previously posted here:

  https://lore.kernel.org/kvmarm/20201117181607.1761516-1-qper...@google.com/

This basically allows us to wrap the host with a stage 2 when running in
nVHE, hence paving the way for protecting guest memory from the host in
the future (among other use-cases). For more details about the
motivation and the design angle taken here, I would recommend to have a
look at the cover letter of v1, and/or to watch these presentations at
LPC [1] and KVM forum 2020 [2].

V3 includes a bunch of clean-ups and small refactorings all over the
place as well as a few new features. Specifically, this now allows us to
remove memory pages from the host stage 2 cleanly, and this series does
so for all the .hyp memory sections (which has uncovered existing bugs
upstream and in v2 of this series -- see [3] and [4]). This also now
makes good use of block mappings whenever that is possible, and has
gotten a bit more testing on real hardware (which helped uncover other
bugs [5]).

The other changes to v3 include:

 - clean-ups, refactoring and extra comments all over the place (Will);

 - dropped fdt hook in favor of memblock API now that the relevant
   patches ([6]) are merged (Rob);

 - moved the CPU feature copy stuff to __init/__initdata (Marc);

 - fixed FWB support (Mate);

 - rebased on v5.12-rc1.

This series depends on Will's vCPU context fix ([5]) and Marc's PMU
fixes ([7]). And here's a branch with all the goodies applied:

  https://android-kvm.googlesource.com/linux qperret/host-stage2-v3

Thanks,
Quentin

[1] https://youtu.be/54q6RzS9BpQ?t=10859
[2] https://youtu.be/wY-u6n75iXc
[3] https://lore.kernel.org/kvmarm/20210203141931.615898-1-qper...@google.com/
[4] https://lore.kernel.org/kvmarm/20210128173850.2478161-1-qper...@google.com/
[5] https://lore.kernel.org/kvmarm/20210226181211.14542-1-w...@kernel.org/
[6] https://lore.kernel.org/lkml/20210115114544.1830068-1-qper...@google.com/
[7] 
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=kvm-arm64/pmu-undef-NV


Quentin Perret (29):
  KVM: arm64: Initialize kvm_nvhe_init_params early
  KVM: arm64: Avoid free_page() in page-table allocator
  KVM: arm64: Factor memory allocation out of pgtable.c
  KVM: arm64: Introduce a BSS section for use at Hyp
  KVM: arm64: Make kvm_call_hyp() a function call at Hyp
  KVM: arm64: Allow using kvm_nvhe_sym() in hyp code
  KVM: arm64: Introduce an early Hyp page allocator
  KVM: arm64: Stub CONFIG_DEBUG_LIST at Hyp
  KVM: arm64: Introduce a Hyp buddy page allocator
  KVM: arm64: Enable access to sanitized CPU features at EL2
  KVM: arm64: Factor out vector address calculation
  KVM: arm64: Prepare the creation of s1 mappings at EL2
  KVM: arm64: Elevate hypervisor mappings creation at EL2
  KVM: arm64: Use kvm_arch for stage 2 pgtable
  KVM: arm64: Use kvm_arch in kvm_s2_mmu
  KVM: arm64: Set host stage 2 using kvm_nvhe_init_params
  KVM: arm64: Refactor kvm_arm_setup_stage2()
  KVM: arm64: Refactor __load_guest_stage2()
  KVM: arm64: Refactor __populate_fault_info()
  KVM: arm64: Make memcache anonymous in pgtable allocator
  KVM: arm64: Reserve memory for host stage 2
  KVM: arm64: Sort the hypervisor memblocks
  KVM: arm64: Introduce PROT_NONE mappings for stage 2
  KVM: arm64: Refactor stage2_map_set_prot_attr()
  KVM: arm64: Add kvm_pgtable_stage2_idmap_greedy()
  KVM: arm64: Wrap the host with a stage 2
  KVM: arm64: Page-align the .hyp sections
  KVM: arm64: Disable PMU support in protected mode
  KVM: arm64: Protect the .hyp sections from the host

Will Deacon (3):
  arm64: lib: Annotate {clear,copy}_page() as position-independent
  KVM: arm64: Link position-independent string routines into .hyp.text
  arm64: kvm: Add standalone ticket spinlock implementation for use at
hyp

 arch/arm64/include/asm/cpufeature.h   |   1 +
 arch/arm64/include/asm/hyp_image.h|   7 +
 arch/arm64/include/asm/kvm_asm.h  |   9 +
 arch/arm64/include/asm/kvm_cpufeature.h   |  19 ++
 arch/arm64/include/asm/kvm_host.h |  19 +-
 arch/arm64/include/asm/kvm_hyp.h  |   8 +
 arch/arm64/include/asm/kvm_mmu.h  |  23 +-
 arch/arm64/include/asm/kvm_pgtable.h  | 117 ++-
 arch/arm64/include/asm/sections.h |   1 +
 arch/arm64/kernel/asm-offsets.c   |   3 +
 arch/arm64/kernel/cpufeature.c|  13 +
 arch/arm64/kernel/image-vars.h|  30 ++
 arch/arm64/kernel/vmlinux.lds.S   |  74 +++--
 arch/arm64/kvm/arm.c  | 199 ++--
 arch/arm64/kvm/hyp/Makefile   |   2 +-
 arch/arm64/kvm/hyp/include/hyp/switch.h   |  37 ++-
 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h |  14 +
 arch/arm64/kvm/hyp/include/nvhe/gfp.h |  55 
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  36 +++
 arch/arm64/kvm/hyp/include/nvhe/memory.h  |  52 +++
 arch/arm64/kvm/hyp/include/nvhe/mm.h  |

[PATCH v3 04/32] KVM: arm64: Initialize kvm_nvhe_init_params early

2021-03-02 Thread Quentin Perret

Move the initialization of kvm_nvhe_init_params in a dedicated function
that is run early, and only once during KVM init, rather than every time
the KVM vectors are set and reset.

This also opens the opportunity for the hypervisor to change the init
structs during boot, hence simplifying the replacement of host-provided
page-table by the one the hypervisor will create for itself.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/arm.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fc4c95dd2d26..2d1e7ef69c04 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1383,22 +1383,18 @@ static int kvm_init_vector_slots(void)
return 0;
 }
 
-static void cpu_init_hyp_mode(void)
+static void cpu_prepare_hyp_mode(int cpu)
 {
-   struct kvm_nvhe_init_params *params = 
this_cpu_ptr_nvhe_sym(kvm_init_params);
-   struct arm_smccc_res res;
+   struct kvm_nvhe_init_params *params = 
per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
unsigned long tcr;
 
-   /* Switch from the HYP stub to our own HYP init vector */
-   __hyp_set_vectors(kvm_get_idmap_vector());
-
/*
 * Calculate the raw per-cpu offset without a translation from the
 * kernel's mapping to the linear mapping, and store it in tpidr_el2
 * so that we can use adr_l to access per-cpu variables in EL2.
 * Also drop the KASAN tag which gets in the way...
 */
-   params->tpidr_el2 = (unsigned 
long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) -
+   params->tpidr_el2 = (unsigned 
long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) -
(unsigned 
long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
 
params->mair_el2 = read_sysreg(mair_el1);
@@ -1422,7 +1418,7 @@ static void cpu_init_hyp_mode(void)
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
 
-   params->stack_hyp_va = 
kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
+   params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) 
+ PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
 
/*
@@ -1430,6 +1426,15 @@ static void cpu_init_hyp_mode(void)
 * be read while the MMU is off.
 */
kvm_flush_dcache_to_poc(params, sizeof(*params));
+}
+
+static void cpu_init_hyp_mode(void)
+{
+   struct kvm_nvhe_init_params *params;
+   struct arm_smccc_res res;
+
+   /* Switch from the HYP stub to our own HYP init vector */
+   __hyp_set_vectors(kvm_get_idmap_vector());
 
/*
 * Call initialization code, and switch to the full blown HYP code.
@@ -1438,6 +1443,7 @@ static void cpu_init_hyp_mode(void)
 * cpus_have_const_cap() wrapper.
 */
BUG_ON(!system_capabilities_finalized());
+   params = this_cpu_ptr_nvhe_sym(kvm_init_params);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), 
virt_to_phys(params), &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
 
@@ -1785,19 +1791,19 @@ static int init_hyp_mode(void)
}
}
 
-   /*
-* Map Hyp percpu pages
-*/
for_each_possible_cpu(cpu) {
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
char *percpu_end = percpu_begin + nvhe_percpu_size();
 
+   /* Map Hyp percpu pages */
err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
-
if (err) {
kvm_err("Cannot map hyp percpu region\n");
goto out_err;
}
+
+   /* Prepare the CPU initialization parameters */
+   cpu_prepare_hyp_mode(cpu);
}
 
if (is_protected_kvm_enabled()) {
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 09/32] KVM: arm64: Allow using kvm_nvhe_sym() in hyp code

2021-03-02 Thread Quentin Perret

In order to allow the usage of code shared by the host and the hyp in
static inline library functions, allow the usage of kvm_nvhe_sym() at
EL2 by defaulting to the raw symbol name.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/hyp_image.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/hyp_image.h 
b/arch/arm64/include/asm/hyp_image.h
index 78cd77990c9c..b4b3076a76fb 100644
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -10,11 +10,15 @@
 #define __HYP_CONCAT(a, b) a ## b
 #define HYP_CONCAT(a, b)   __HYP_CONCAT(a, b)
 
+#ifndef __KVM_NVHE_HYPERVISOR__
 /*
  * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
  * to separate it from the kernel proper.
  */
 #define kvm_nvhe_sym(sym)  __kvm_nvhe_##sym
+#else
+#define kvm_nvhe_sym(sym)  sym
+#endif
 
 #ifdef LINKER_SCRIPT
 
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 01/32] arm64: lib: Annotate {clear, copy}_page() as position-independent

2021-03-02 Thread Quentin Perret

From: Will Deacon 

clear_page() and copy_page() are suitable for use outside of the kernel
address space, so annotate them as position-independent code.

Signed-off-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/lib/clear_page.S | 4 ++--
 arch/arm64/lib/copy_page.S  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 073acbf02a7c..b84b179edba3 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
  * Parameters:
  * x0 - dest
  */
-SYM_FUNC_START(clear_page)
+SYM_FUNC_START_PI(clear_page)
mrs x1, dczid_el0
and w1, w1, #0xf
mov x2, #4
@@ -25,5 +25,5 @@ SYM_FUNC_START(clear_page)
tst x0, #(PAGE_SIZE - 1)
b.ne1b
ret
-SYM_FUNC_END(clear_page)
+SYM_FUNC_END_PI(clear_page)
 EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index e7a793961408..29144f4cd449 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
  * x0 - dest
  * x1 - src
  */
-SYM_FUNC_START(copy_page)
+SYM_FUNC_START_PI(copy_page)
 alternative_if ARM64_HAS_NO_HW_PREFETCH
// Prefetch three cache lines ahead.
prfmpldl1strm, [x1, #128]
@@ -75,5 +75,5 @@ alternative_else_nop_endif
stnpx16, x17, [x0, #112 - 256]
 
ret
-SYM_FUNC_END(copy_page)
+SYM_FUNC_END_PI(copy_page)
 EXPORT_SYMBOL(copy_page)
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 19/32] KVM: arm64: Set host stage 2 using kvm_nvhe_init_params

2021-03-02 Thread Quentin Perret

Move the registers relevant to host stage 2 enablement to
kvm_nvhe_init_params to prepare the ground for enabling it in later
patches.

Acked-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h   |  3 +++
 arch/arm64/kernel/asm-offsets.c|  3 +++
 arch/arm64/kvm/arm.c   |  5 +
 arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 +-
 arch/arm64/kvm/hyp/nvhe/switch.c   |  5 +
 5 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index db20a9477870..6dce860f8bca 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -158,6 +158,9 @@ struct kvm_nvhe_init_params {
unsigned long tpidr_el2;
unsigned long stack_hyp_va;
phys_addr_t pgd_pa;
+   unsigned long hcr_el2;
+   unsigned long vttbr;
+   unsigned long vtcr;
 };
 
 /* Translate a kernel address @ptr into its equivalent linear mapping */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a36e2fc330d4..8930b42f6418 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -120,6 +120,9 @@ int main(void)
   DEFINE(NVHE_INIT_TPIDR_EL2,  offsetof(struct kvm_nvhe_init_params, 
tpidr_el2));
   DEFINE(NVHE_INIT_STACK_HYP_VA,   offsetof(struct kvm_nvhe_init_params, 
stack_hyp_va));
   DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
+  DEFINE(NVHE_INIT_HCR_EL2,offsetof(struct kvm_nvhe_init_params, hcr_el2));
+  DEFINE(NVHE_INIT_VTTBR,  offsetof(struct kvm_nvhe_init_params, vttbr));
+  DEFINE(NVHE_INIT_VTCR,   offsetof(struct kvm_nvhe_init_params, vtcr));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_CTX_SP,   offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 5a97abdc3ba6..b6a818f88051 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1413,6 +1413,11 @@ static void cpu_prepare_hyp_mode(int cpu)
 
params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) 
+ PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
+   if (is_protected_kvm_enabled())
+   params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
+   else
+   params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
+   params->vttbr = params->vtcr = 0;
 
/*
 * Flush the init params from the data cache because the struct will
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S 
b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index bc56ea92b812..f312672d895e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -83,11 +83,6 @@ SYM_CODE_END(__kvm_hyp_init)
  * x0: struct kvm_nvhe_init_params PA
  */
 SYM_CODE_START_LOCAL(___kvm_hyp_init)
-alternative_if ARM64_KVM_PROTECTED_MODE
-   mov_q   x1, HCR_HOST_NVHE_PROTECTED_FLAGS
-   msr hcr_el2, x1
-alternative_else_nop_endif
-
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
msr tpidr_el2, x1
 
@@ -97,6 +92,15 @@ alternative_else_nop_endif
ldr x1, [x0, #NVHE_INIT_MAIR_EL2]
msr mair_el2, x1
 
+   ldr x1, [x0, #NVHE_INIT_HCR_EL2]
+   msr hcr_el2, x1
+
+   ldr x1, [x0, #NVHE_INIT_VTTBR]
+   msr vttbr_el2, x1
+
+   ldr x1, [x0, #NVHE_INIT_VTCR]
+   msr vtcr_el2, x1
+
ldr x1, [x0, #NVHE_INIT_PGD_PA]
phys_to_ttbr x2, x1
 alternative_if ARM64_HAS_CNP
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f3d0e9eca56c..979a76cdf9fb 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -97,10 +97,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 
write_sysreg(mdcr_el2, mdcr_el2);
-   if (is_protected_kvm_enabled())
-   write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
-   else
-   write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+   write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
 }
-- 
2.30.1.766.gb4fecdf3b7-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 07/32] KVM: arm64: Introduce a BSS section for use at Hyp

2021-03-02 Thread Quentin Perret

Currently, the hyp code cannot make full use of a bss, as the kernel
section is mapped read-only.

While this mapping could simply be changed to read-write, it would
intermingle even more the hyp and kernel state than they currently are.
Instead, introduce a __hyp_bss section, that uses reserved pages, and
create the appropriate RW hyp mappings during KVM init.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/sections.h |  1 +
 arch/arm64/kernel/vmlinux.lds.S   | 52 ---
 arch/arm64/kvm/arm.c  | 14 -
 arch/arm64/kvm/hyp/nvhe/hyp.lds.S |  1 +
 4 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/sections.h 
b/arch/arm64/include/asm/sections.h
index 2f36b16a5b5d..e4ad9db53af1 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -13,6 +13,7 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
 extern char __hyp_rodata_start[], __hyp_rodata_end[];
 extern char __hyp_reloc_begin[], __hyp_reloc_end[];
+extern char __hyp_bss_start[], __hyp_bss_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7eea7888bb02..e96173ce211b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,24 +5,7 @@
  * Written by Martin Mares 
  */
 
-#define RO_EXCEPTION_TABLE_ALIGN   8
-#define RUNTIME_DISCARD_EXIT
-
-#include 
-#include 
 #include 
-#include 
-#include 
-#include 
-
-#include "image.h"
-
-OUTPUT_ARCH(aarch64)
-ENTRY(_text)
-
-jiffies = jiffies_64;
-
-
 #ifdef CONFIG_KVM
 #define HYPERVISOR_EXTABLE \
. = ALIGN(SZ_8);\
@@ -51,13 +34,43 @@ jiffies = jiffies_64;
__hyp_reloc_end = .;\
}
 
+#define BSS_FIRST_SECTIONS \
+   __hyp_bss_start = .;\
+   *(HYP_SECTION_NAME(.bss))   \
+   . = ALIGN(PAGE_SIZE);   \
+   __hyp_bss_end = .;
+
+/*
+ * We require that __hyp_bss_start and __bss_start are aligned, and enforce it
+ * with an assertion. But the BSS_SECTION macro places an empty .sbss section
+ * between them, which can in some cases cause the linker to misalign them. To
+ * work around the issue, force a page alignment for __bss_start.
+ */
+#define SBSS_ALIGN PAGE_SIZE
 #else /* CONFIG_KVM */
 #define HYPERVISOR_EXTABLE
 #define HYPERVISOR_DATA_SECTIONS
 #define HYPERVISOR_PERCPU_SECTION
 #define HYPERVISOR_RELOC_SECTION
+#define SBSS_ALIGN 0
 #endif
 
+#define RO_EXCEPTION_TABLE_ALIGN   8
+#define RUNTIME_DISCARD_EXIT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "image.h"
+
+OUTPUT_ARCH(aarch64)
+ENTRY(_text)
+
+jiffies = jiffies_64;
+
 #define HYPERVISOR_TEXT\
/*  \
 * Align to 4 KB so that\
@@ -276,7 +289,7 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
 
-   BSS_SECTION(0, 0, 0)
+   BSS_SECTION(SBSS_ALIGN, 0, 0)
 
. = ALIGN(PAGE_SIZE);
init_pg_dir = .;
@@ -324,6 +337,9 @@ ASSERT(__hibernate_exit_text_end - 
(__hibernate_exit_text_start & ~(SZ_4K - 1))
 ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
"Entry trampoline text too big")
 #endif
+#ifdef CONFIG_KVM
+ASSERT(__hyp_bss_start == __bss_start, "HYP and Host BSS are misaligned")
+#endif
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
  */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 2d1e7ef69c04..3f8bcf8db036 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1770,7 +1770,19 @@ static int init_hyp_mode(void)
goto out_err;
}
 
-   err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+   /*
+* .hyp.bss is guaranteed to be placed at the beginning of the .bss
+* section thanks to an assertion in the linker script. Map it RW and
+* the rest of .bss RO.
+*/
+   err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start),
+ kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
+   if (err) {
+   kvm_err("Cannot map hyp bss section: %d\n", err);
+   goto out_err;
+   }
+
+   err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
if (err) {
kvm_err("Cannot map bss section\n");
diff --git a/arch/arm64/kvm/hyp/nv

Re: [PATCH] kvm: arm64: nvhe: Save the SPE context early

2021-03-02 Thread Marc Zyngier

On Tue, 2 Mar 2021 12:03:45 +, Suzuki K Poulose wrote:
> The nVHE KVM hyp drains and disables the SPE buffer, before
> entering the guest, as the EL1&0 translation regime
> is going to be loaded with that of the guest.
> 
> But this operation is performed way too late, because :
>   - The owning translation regime of the SPE buffer
> is transferred to EL2. (MDCR_EL2_E2PB == 0)
>   - The guest Stage1 is loaded.
> 
> [...]

Applied to kvmarm-master/fixes, thanks!

[1/1] kvm: arm64: nvhe: Save the SPE context early
  commit: cfe1e2b6949785e90e84918295f2be1b6fd152b6

Cheers,

M.
-- 
Without deviation from the norm, progress is not possible.


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH] KVM: arm64: Fix nVHE hyp panic host context restore

2021-03-02 Thread Marc Zyngier

On Fri, 19 Feb 2021 12:24:06 +, Andrew Scull wrote:
> When panicking from the nVHE hyp and restoring the host context, x29 is
> expected to hold a pointer to the host context. This wasn't being done
> so fix it to make sure there's a valid pointer the host context being
> used.
> 
> Rather than passing a boolean indicating whether or not the host context
> should be restored, instead pass the pointer to the host context. NULL
> is passed to indicate that no context should be restored.

Applied to kvmarm-master/fixes, thanks!

[1/1] KVM: arm64: Fix nVHE hyp panic host context restore
  commit: ff9b922a28554de61b9dc36578db77d6720c652a

Cheers,

M.
-- 
Without deviation from the norm, progress is not possible.


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH v2 0/2] KVM: arm64: Prevent spurious PMU accesses when no

2021-03-02 Thread Marc Zyngier

On Tue, 9 Feb 2021 11:48:42 +, Marc Zyngier wrote:
> Yet another PMU bug that is only likely to hit under Nested Virt: we
> unconditionally access PMU registers without checking whether it
> actually is present.
> 
> Given that we already have a predicate for this, promote it to a
> static key, and use that in the world switch.
> 
> [...]

Applied to kvmarm-master/fixes, thanks!

[1/2] KVM: arm64: Turn kvm_arm_support_pmu_v3() into a static key
  commit: 502e5f9a6985898b5318ebb5978a54c3ebf3dfe1
[2/2] KVM: arm64: Don't access PMSELR_EL0/PMUSERENR_EL0 when no PMU is available
  commit: 7b85a9313e6cad22a66027151b6d54d1ce44543f

Cheers,

M.
-- 
Without deviation from the norm, progress is not possible.


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH] KVM: arm64: Avoid corrupting vCPU context register in guest exit

2021-03-02 Thread Marc Zyngier

On Fri, 26 Feb 2021 18:12:11 +, Will Deacon wrote:
> Commit 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest
> context") tracks the currently running vCPU, clearing the pointer to
> NULL on exit from a guest.
> 
> Unfortunately, the use of 'set_loaded_vcpu' clobbers x1 to point at the
> kvm_hyp_ctxt instead of the vCPU context, causing the subsequent RAS
> code to go off into the weeds when it saves the DISR assuming that the
> CPU context is embedded in a struct vCPU.
> 
> [...]

Applied to kvmarm-master/fixes, thanks!

[1/1] KVM: arm64: Avoid corrupting vCPU context register in guest exit
  commit: a8a0f5dbcdf57d89bb8d555c6423763d99a156c1

Cheers,

M.
-- 
Without deviation from the norm, progress is not possible.


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

[PATCH v3 1/1] kvm: arm64: Add SVE support for nVHE.

2021-03-02 Thread Daniel Kiss

CPUs that support SVE are architecturally required to support the
Virtualization Host Extensions (VHE), so far the kernel supported
SVE alongside KVM with VHE enabled. In same cases it is desired to
run nVHE config even when VHE is available.
This patch add support for SVE for nVHE configuration too.

Tested on FVP with a Linux guest VM that run with a different VL than
the host system.

Signed-off-by: Daniel Kiss 
---
 arch/arm64/Kconfig  |  7 -
 arch/arm64/include/asm/el2_setup.h  |  2 +-
 arch/arm64/include/asm/fpsimd.h |  6 
 arch/arm64/include/asm/fpsimdmacros.h   | 24 ++--
 arch/arm64/include/asm/kvm_arm.h|  6 
 arch/arm64/include/asm/kvm_host.h   | 17 +++
 arch/arm64/kernel/entry-fpsimd.S|  5 
 arch/arm64/kvm/arm.c|  5 
 arch/arm64/kvm/fpsimd.c | 38 -
 arch/arm64/kvm/hyp/fpsimd.S | 15 ++
 arch/arm64/kvm/hyp/include/hyp/switch.h | 34 +++---
 arch/arm64/kvm/hyp/nvhe/switch.c| 24 
 arch/arm64/kvm/reset.c  |  4 ---
 13 files changed, 127 insertions(+), 60 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f39568b28ec1..049428f1bf27 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1676,7 +1676,6 @@ endmenu
 config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
-   depends on !KVM || ARM64_VHE
help
  The Scalable Vector Extension (SVE) is an extension to the AArch64
  execution state which complements and extends the SIMD functionality
@@ -1705,12 +1704,6 @@ config ARM64_SVE
  booting the kernel.  If unsure and you are not observing these
  symptoms, you should assume that it is safe to say Y.
 
- CPUs that support SVE are architecturally required to support the
- Virtualization Host Extensions (VHE), so the kernel makes no
- provision for supporting SVE alongside KVM without VHE enabled.
- Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
- KVM in the same kernel image.
-
 config ARM64_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
diff --git a/arch/arm64/include/asm/el2_setup.h 
b/arch/arm64/include/asm/el2_setup.h
index a7f5a1bbc8ac..0207393e67c3 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -133,7 +133,7 @@
bic x0, x0, #CPTR_EL2_TZ// Also disable SVE traps
msr cptr_el2, x0// Disable copro. traps to EL2
isb
-   mov x1, #ZCR_ELx_LEN_MASK   // SVE: Enable full vector
+   mov x1, #ZCR_EL2_LEN_HOST   // SVE: Enable full vector
msr_s   SYS_ZCR_EL2, x1 // length for EL1.
 1:
 .endm
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index bec5f14b622a..526d69f3eeb3 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,6 +69,12 @@ static inline void *sve_pffr(struct thread_struct *thread)
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
   unsigned long vq_minus_1);
+/*
+ * sve_load_state_nvhe function for the hyp code where the SVE registers are
+ * handled from the EL2, vector length is governed by ZCR_EL2.
+ */
+extern void sve_load_state_nvhe(void const *state, u32 const *pfpsr,
+  unsigned long vq_minus_1);
 extern void sve_flush_live(void);
 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
   unsigned long vq_minus_1);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h 
b/arch/arm64/include/asm/fpsimdmacros.h
index af43367534c7..d309c6071bce 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -205,6 +205,17 @@
 921:
 .endm
 
+/* Update ZCR_EL2.LEN with the new VQ */
+.macro sve_load_vq_nvhe xvqminus1, xtmp, xtmp2
+   mrs_s   \xtmp, SYS_ZCR_EL2
+   bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
+   orr \xtmp2, \xtmp2, \xvqminus1
+   cmp \xtmp2, \xtmp
+   b.eq922f
+   msr_s   SYS_ZCR_EL2, \xtmp2 //self-synchronising
+922:
+.endm
+
 /* Preserve the first 128-bits of Znz and zero the rest. */
 .macro _sve_flush_z nz
_sve_check_zreg \nz
@@ -230,8 +241,7 @@
str w\nxtmp, [\xpfpsr, #4]
 .endm
 
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
-   sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
+.macro _sve_load nxbase, xpfpsr, nxtmp
  _for n, 0, 31,_sve_ldr_v  \n, \nxbase, \n - 34
_sve_ldr_p

[PATCH v3 0/1] kvm: arm64: Add SVE support for nVHE.

2021-03-02 Thread Daniel Kiss

Addressing review comments from the previous version[1].
The discussed optimisation will be a sparate patch later.

[1] https://www.spinics.net/lists/arm-kernel/msg874768.html

Changes from v2:
* Comments are addressed.
* rebased to v5.11

Changes from v1:
* Vector length handling is changed.

Daniel Kiss (1):
  kvm: arm64: Add SVE support for nVHE.

 arch/arm64/Kconfig  |  7 -
 arch/arm64/include/asm/el2_setup.h  |  2 +-
 arch/arm64/include/asm/fpsimd.h |  6 
 arch/arm64/include/asm/fpsimdmacros.h   | 24 ++--
 arch/arm64/include/asm/kvm_arm.h|  6 
 arch/arm64/include/asm/kvm_host.h   | 17 +++
 arch/arm64/kernel/entry-fpsimd.S|  5 
 arch/arm64/kvm/arm.c|  5 
 arch/arm64/kvm/fpsimd.c | 38 -
 arch/arm64/kvm/hyp/fpsimd.S | 15 ++
 arch/arm64/kvm/hyp/include/hyp/switch.h | 34 +++---
 arch/arm64/kvm/hyp/nvhe/switch.c| 24 
 arch/arm64/kvm/reset.c  |  4 ---
 13 files changed, 127 insertions(+), 60 deletions(-)

-- 
2.25.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH kvmtool v2 01/22] ioport: Remove ioport__setup_arch()

2021-03-02 Thread Alexandru Elisei

Hi Andre,

On 2/25/21 12:58 AM, Andre Przywara wrote:
> Since x86 had a special need for registering tons of special I/O ports,
> we had an ioport__setup_arch() callback, to allow each architecture
> to do the same. As it turns out no one uses it beside x86, so we remove
> that unnecessary abstraction.
>
> The generic function was registered via a device_base_init() call, so
> we just do the same for the x86 specific function only, and can remove
> the unneeded ioport__setup_arch().

Looks good, I did a compile test for arm64 and x86, and I grepped the kvmtool
directory for ioport__setup_arch. x86 is the only user left:

Reviewed-by: Alexandru Elisei 

Thanks,

Alex

>
> Signed-off-by: Andre Przywara 
> ---
>  arm/ioport.c | 5 -
>  include/kvm/ioport.h | 1 -
>  ioport.c | 6 --
>  mips/kvm.c   | 5 -
>  powerpc/ioport.c | 6 --
>  x86/ioport.c | 3 ++-
>  6 files changed, 2 insertions(+), 24 deletions(-)
>
> diff --git a/arm/ioport.c b/arm/ioport.c
> index 2f0feb9a..24092c9d 100644
> --- a/arm/ioport.c
> +++ b/arm/ioport.c
> @@ -1,11 +1,6 @@
>  #include "kvm/ioport.h"
>  #include "kvm/irq.h"
>  
> -int ioport__setup_arch(struct kvm *kvm)
> -{
> - return 0;
> -}
> -
>  void ioport__map_irq(u8 *irq)
>  {
>   *irq = irq__alloc_line();
> diff --git a/include/kvm/ioport.h b/include/kvm/ioport.h
> index 039633f7..d0213541 100644
> --- a/include/kvm/ioport.h
> +++ b/include/kvm/ioport.h
> @@ -35,7 +35,6 @@ struct ioport_operations {
>   enum irq_type));
>  };
>  
> -int ioport__setup_arch(struct kvm *kvm);
>  void ioport__map_irq(u8 *irq);
>  
>  int __must_check ioport__register(struct kvm *kvm, u16 port, struct 
> ioport_operations *ops,
> diff --git a/ioport.c b/ioport.c
> index 844a832d..a6972179 100644
> --- a/ioport.c
> +++ b/ioport.c
> @@ -221,12 +221,6 @@ out:
>   return !kvm->cfg.ioport_debug;
>  }
>  
> -int ioport__init(struct kvm *kvm)
> -{
> - return ioport__setup_arch(kvm);
> -}
> -dev_base_init(ioport__init);
> -
>  int ioport__exit(struct kvm *kvm)
>  {
>   ioport__unregister_all();
> diff --git a/mips/kvm.c b/mips/kvm.c
> index 26355930..e110e5d5 100644
> --- a/mips/kvm.c
> +++ b/mips/kvm.c
> @@ -100,11 +100,6 @@ void kvm__irq_trigger(struct kvm *kvm, int irq)
>   die_perror("KVM_IRQ_LINE ioctl");
>  }
>  
> -int ioport__setup_arch(struct kvm *kvm)
> -{
> - return 0;
> -}
> -
>  bool kvm__arch_cpu_supports_vm(void)
>  {
>   return true;
> diff --git a/powerpc/ioport.c b/powerpc/ioport.c
> index 0c188b61..a5cff4ee 100644
> --- a/powerpc/ioport.c
> +++ b/powerpc/ioport.c
> @@ -12,12 +12,6 @@
>  
>  #include 
>  
> -int ioport__setup_arch(struct kvm *kvm)
> -{
> - /* PPC has no legacy ioports to set up */
> - return 0;
> -}
> -
>  void ioport__map_irq(u8 *irq)
>  {
>  }
> diff --git a/x86/ioport.c b/x86/ioport.c
> index 7ad7b8f3..a8d2bb1a 100644
> --- a/x86/ioport.c
> +++ b/x86/ioport.c
> @@ -69,7 +69,7 @@ void ioport__map_irq(u8 *irq)
>  {
>  }
>  
> -int ioport__setup_arch(struct kvm *kvm)
> +static int ioport__setup_arch(struct kvm *kvm)
>  {
>   int r;
>  
> @@ -150,3 +150,4 @@ int ioport__setup_arch(struct kvm *kvm)
>  
>   return 0;
>  }
> +dev_base_init(ioport__setup_arch);
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH kvmtool v2 00/22] Unify I/O port and MMIO trap handling

2021-03-02 Thread Alexandru Elisei

Hi Andre,

I've started to review this iteration and I've come across this error when 
trying
to apply the patches:

$ git am --reject
patches/unify-ioport-and-mmio/v2/Unify-I-O-port-and-MMIO-trap-handling.patch
Applying: ioport: Remove ioport__setup_arch()
Checking patch arm/ioport.c...
Checking patch include/kvm/ioport.h...
Checking patch ioport.c...
Checking patch mips/kvm.c...
Checking patch powerpc/ioport.c...
Checking patch x86/ioport.c...
Applied patch arm/ioport.c cleanly.
Applied patch include/kvm/ioport.h cleanly.
Applied patch ioport.c cleanly.
Applied patch mips/kvm.c cleanly.
Applied patch powerpc/ioport.c cleanly.
Applied patch x86/ioport.c cleanly.
Applying: hw/serial: Use device abstraction for FDT generator function
Checking patch hw/serial.c...
Checking patch include/kvm/kvm.h...
Applied patch hw/serial.c cleanly.
Applied patch include/kvm/kvm.h cleanly.
Applying: ioport: Retire .generate_fdt_node functionality
Checking patch include/kvm/ioport.h...
Checking patch ioport.c...
Applied patch include/kvm/ioport.h cleanly.
Applied patch ioport.c cleanly.
Applying: mmio: Extend handling to include ioport emulation
Checking patch include/kvm/kvm.h...
Checking patch ioport.c...
Checking patch mmio.c...
Applied patch include/kvm/kvm.h cleanly.
Applied patch ioport.c cleanly.
Applied patch mmio.c cleanly.
Applying: hw/i8042: Clean up data types
Checking patch hw/i8042.c...
Applied patch hw/i8042.c cleanly.
Applying: hw/i8042: Refactor trap handler
Checking patch hw/i8042.c...
Applied patch hw/i8042.c cleanly.
Applying: hw/i8042: Switch to new trap handlers
Checking patch hw/i8042.c...
error: while searching for:
        ioport__write8(data, value);
}

/*
 * Called when the OS has written to one of the keyboard's ports (0x60 or 0x64)
 */
static bool kbd_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void
*data, int size)
{
    kbd_io(vcpu, port, data, size, false, NULL);

    return true;
}

static bool kbd_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void
*data, int size)
{
    kbd_io(vcpu, port, data, size, true, NULL);

    return true;
}

static struct ioport_operations kbd_ops = {
    .io_in        = kbd_in,
    .io_out        = kbd_out,
};

int kbd__init(struct kvm *kvm)
{
    int r;

    kbd_reset();
    state.kvm = kvm;
    r = ioport__register(kvm, I8042_DATA_REG, &kbd_ops, 2, NULL);
    if (r < 0)
        return r;
    r = ioport__register(kvm, I8042_COMMAND_REG, &kbd_ops, 2, NULL);
    if (r < 0) {
        ioport__unregister(kvm, I8042_DATA_REG);
        return r;
    }

error: patch failed: hw/i8042.c:325
Checking patch include/kvm/i8042.h...
Applying patch hw/i8042.c with 1 reject...
Rejected hunk #1.
Applied patch include/kvm/i8042.h cleanly.
Patch failed at 0007 hw/i8042: Switch to new trap handlers
hint: Use 'git am --show-current-patch=diff' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

where the patch file is from patchwork.kernel.org [1], created when clicking on
the "series" button on the top right. I'm not sure what is causing the error,
everything looks the same to me.

Regardless, I've applied the reject manually and everything looks ok.

[1]
https://patchwork.kernel.org/project/kvm/patch/20210225005915.26423-2-andre.przyw...@arm.com/

Thanks,

Alex

On 2/25/21 12:58 AM, Andre Przywara wrote:
> Compared to v1 this has a few fixes, as suggested by Alex.
> There is a new patch 20/22, which cleans up the ARM memory map
> definition and adds some chart to make it more obvious what is going on.
> For a changelog, see below.
>
> ==
>
> At the moment we use two separate code paths to handle exits for
> KVM_EXIT_IO (ioport.c) and KVM_EXIT_MMIO (mmio.c), even though they
> are semantically very similar. Because the trap handler callback routine
> is different, devices need to decide on one conduit or need to provide
> different handler functions for both of them.
>
> This is not only unnecessary code duplication, but makes switching
> devices from I/O port to MMIO a tedious task, even though there is no
> real difference between the two, especially on ARM and PowerPC.
>
> For ARM we aim at providing a flexible memory layout, and also have
> trouble with the UART and RTC device overlapping with the PCI I/O area,
> so it seems indicated to tackle this once and for all.
>
> The first three patches do some cleanup, to simplify things later.
>
> Patch 04/22 lays the groundwork, by extending mmio.c to be able to also
> register I/O port trap handlers, using the same callback prototype as
> we use for MMIO.
>
> The next 14 patches then convert devices that use the I/O port
> interface over to the new joint interface. This requires to rework
> the trap handler routine to adhere to the same prototype as the existing
> MMIO handlers. For most devices this is done in two steps: a first

Re: [RFC PATCH 3/4] KVM: arm64: Install the block entry before unmapping the page mappings

2021-03-02 Thread Alexandru Elisei

Hello,

On 2/8/21 11:22 AM, Yanan Wang wrote:
> When KVM needs to coalesce the normal page mappings into a block mapping,
> we currently invalidate the old table entry first followed by invalidation
> of TLB, then unmap the page mappings, and install the block entry at last.
>
> It will cost a long time to unmap the numerous page mappings, which means
> there will be a long period when the table entry can be found invalid.
> If other vCPUs access any guest page within the block range and find the
> table entry invalid, they will all exit from guest with a translation fault
> which is not necessary. And KVM will make efforts to handle these faults,
> especially when performing CMOs by block range.
>
> So let's quickly install the block entry at first to ensure uninterrupted
> memory access of the other vCPUs, and then unmap the page mappings after
> installation. This will reduce most of the time when the table entry is
> invalid, and avoid most of the unnecessary translation faults.

I'm not convinced I've fully understood what is going on yet, but it seems to me
that the idea is sound. Some questions and comments below.

>
> Signed-off-by: Yanan Wang 
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 26 --
>  1 file changed, 12 insertions(+), 14 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 78a560446f80..308c36b9cd21 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -434,6 +434,7 @@ struct stage2_map_data {
>   kvm_pte_t   attr;
>  
>   kvm_pte_t   *anchor;
> + kvm_pte_t   *follow;
>  
>   struct kvm_s2_mmu   *mmu;
>   struct kvm_mmu_memory_cache *memcache;
> @@ -553,15 +554,14 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, 
> u32 level,
>   if (!kvm_block_mapping_supported(addr, end, data->phys, level))
>   return 0;
>  
> - kvm_set_invalid_pte(ptep);
> -
>   /*
> -  * Invalidate the whole stage-2, as we may have numerous leaf
> -  * entries below us which would otherwise need invalidating
> -  * individually.
> +  * If we need to coalesce existing table entries into a block here,
> +  * then install the block entry first and the sub-level page mappings
> +  * will be unmapped later.
>*/
> - kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
>   data->anchor = ptep;
> + data->follow = kvm_pte_follow(*ptep);
> + stage2_coalesce_tables_into_block(addr, level, ptep, data);

Here's how stage2_coalesce_tables_into_block() is implemented from the previous
patch (it might be worth merging it with this patch, I found it impossible to
judge if the function is correct without seeing how it is used and what is 
replacing):

static void stage2_coalesce_tables_into_block(u64 addr, u32 level,
                      kvm_pte_t *ptep,
                      struct stage2_map_data *data)
{
    u64 granule = kvm_granule_size(level), phys = data->phys;
    kvm_pte_t new = kvm_init_valid_leaf_pte(phys, data->attr, level);

    kvm_set_invalid_pte(ptep);

    /*
     * Invalidate the whole stage-2, as we may have numerous leaf entries
     * below us which would otherwise need invalidating individually.
     */
    kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
    smp_store_release(ptep, new);
    data->phys += granule;
}

This works because __kvm_pgtable_visit() saves the *ptep value before calling 
the
pre callback, and it visits the next level table based on the initial pte value,
not the new value written by stage2_coalesce_tables_into_block().

Assuming the first patch in the series is merged ("KVM: arm64: Move the clean of
dcache to the map handler"), this function is missing the CMOs from
stage2_map_walker_try_leaf(). I can think of the following situation where they
are needed:

1. The 2nd level (PMD) table that will be turned into a block is mapped at 
stage 2
because one of the pages in the 3rd level (PTE) table it points to is accessed 
by
the guest.

2. The kernel decides to turn the userspace mapping into a transparent huge page
and calls the mmu notifier to remove the mapping from stage 2. The 2nd level 
table
is still valid.

3. Guest accesses a page which is not the page it accessed at step 1, which 
causes
a translation fault. KVM decides we can use a PMD block mapping to map the 
address
and we end up in stage2_coalesce_tables_into_block(). We need CMOs in this case
because the guest accesses memory it didn't access before.

What do you think, is that a valid situation?

>   return 0;
>  }
>  
> @@ -614,20 +614,18 @@ static int stage2_map_walk_table_post(u64 addr, u64 
> end, u32 level,
> kvm_pte_t *ptep,
> struct stage2_map_data *data)
>  {
> - int ret = 0;
> -
>   if (!data->anchor)
>   return 0;
>  
> - free_page((unsigned long

Re: [RFC PATCH] kvm: arm64: Try stage2 block mapping for host device MMIO

2021-03-02 Thread Keqian Zhu

Hi Marc,

Do you have further suggestion on this? Block mapping do bring obvious benefit.

Thanks,
Keqian

On 2021/1/25 19:25, Keqian Zhu wrote:
> Hi Marc,
> 
> On 2021/1/22 17:45, Marc Zyngier wrote:
>> On 2021-01-22 08:36, Keqian Zhu wrote:
>>> The MMIO region of a device maybe huge (GB level), try to use block
>>> mapping in stage2 to speedup both map and unmap.
>>>
>>> Especially for unmap, it performs TLBI right after each invalidation
>>> of PTE. If all mapping is of PAGE_SIZE, it takes much time to handle
>>> GB level range.
>>
>> This is only on VM teardown, right? Or do you unmap the device more ofet?
>> Can you please quantify the speedup and the conditions this occurs in?
> 
> Yes, and there are some other paths (includes what your patch series handles) 
> will do the unmap action:
> 
> 1、guest reboot without S2FWB: stage2_unmap_vm（）which only unmaps guest 
> regular RAM.
> 2、userspace deletes memslot: kvm_arch_flush_shadow_memslot().
> 3、rollback of device MMIO mapping: kvm_arch_prepare_memory_region().
> 4、rollback of dirty log tracking: If we enable hugepage for guest RAM, after 
> dirty log is stopped,
>the newly created block mappings will 
> unmap all page mappings.
> 5、mmu notifier: kvm_unmap_hva_range(). AFAICS, we will use this path when VM 
> teardown or guest resets pass-through devices.
> The bugfix[1] gives the reason for 
> unmapping MMIO region when guest resets pass-through devices.
> 
> unmap related to MMIO region, as this patch solves:
> point 1 is not applied.
> point 2 occurs when userspace unplug pass-through devices.
> point 3 can occurs, but rarely.
> point 4 is not applied.
> point 5 occurs when VM teardown or guest resets pass-through devices.
> 
> And I had a look at your patch series, it can solve:
> For VM teardown, elide CMO and perform VMALL instead of individually (But 
> current kernel do not go through this path when VM teardown).
> For rollback of dirty log tracking, elide CMO.
> For kvm_unmap_hva_range, if event is MMU_NOTIFY_UNMAP. elide CMO.
> 
> (But I doubt the CMOs in unmap. As we perform CMOs in user_mem_abort when 
> install new stage2 mapping for VM,
>  maybe the CMO in unmap is unnecessary under all conditions :-) ?)
> 
> So it shows that we are solving different parts of unmap, so they are not 
> conflicting. At least this patch can
> still speedup map of device MMIO region, and speedup unmap of device MMIO 
> region even if we do not need to perform
> CMO and TLBI ;-).
> 
> speedup: unmap 8GB MMIO on FPGA.
> 
>beforeafter opt
> cost30+ minutes949ms
> 
> Thanks,
> Keqian
> 
>>
>> I have the feeling that we are just circling around another problem,
>> which is that we could rely on a VM-wide TLBI when tearing down the
>> guest. I worked on something like that[1] a long while ago, and parked
>> it for some reason. Maybe it is worth reviving.
>>
>> [1] 
>> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=kvm-arm64/elide-cmo-tlbi
>>
>>>
>>> Signed-off-by: Keqian Zhu 
>>> ---
>>>  arch/arm64/include/asm/kvm_pgtable.h | 11 +++
>>>  arch/arm64/kvm/hyp/pgtable.c | 15 +++
>>>  arch/arm64/kvm/mmu.c | 12 
>>>  3 files changed, 34 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_pgtable.h
>>> b/arch/arm64/include/asm/kvm_pgtable.h
>>> index 52ab38db04c7..2266ac45f10c 100644
>>> --- a/arch/arm64/include/asm/kvm_pgtable.h
>>> +++ b/arch/arm64/include/asm/kvm_pgtable.h
>>> @@ -82,6 +82,17 @@ struct kvm_pgtable_walker {
>>>  const enum kvm_pgtable_walk_flagsflags;
>>>  };
>>>
>>> +/**
>>> + * kvm_supported_pgsize() - Get the max supported page size of a mapping.
>>> + * @pgt:Initialised page-table structure.
>>> + * @addr:Virtual address at which to place the mapping.
>>> + * @end:End virtual address of the mapping.
>>> + * @phys:Physical address of the memory to map.
>>> + *
>>> + * The smallest return value is PAGE_SIZE.
>>> + */
>>> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
>>> phys);
>>> +
>>>  /**
>>>   * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
>>>   * @pgt:Uninitialised page-table structure to initialise.
>>> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
>>> index bdf8e55ed308..ab11609b9b13 100644
>>> --- a/arch/arm64/kvm/hyp/pgtable.c
>>> +++ b/arch/arm64/kvm/hyp/pgtable.c
>>> @@ -81,6 +81,21 @@ static bool kvm_block_mapping_supported(u64 addr,
>>> u64 end, u64 phys, u32 level)
>>>  return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
>>>  }
>>>
>>> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
>>> phys)
>>> +{
>>> +u32 lvl;
>>> +u64 pgsize = PAGE_SIZE;
>>> +
>>> +for (lvl = pgt->start_level; lvl < KVM_PGTABLE_MAX_LEVELS; lvl++) {
>>> +if (kvm_block_mapping_

[PATCH] kvm: arm64: nvhe: Save the SPE context early

2021-03-02 Thread Suzuki K Poulose

The nVHE KVM hyp drains and disables the SPE buffer, before
entering the guest, as the EL1&0 translation regime
is going to be loaded with that of the guest.

But this operation is performed way too late, because :
  - The owning translation regime of the SPE buffer
is transferred to EL2. (MDCR_EL2_E2PB == 0)
  - The guest Stage1 is loaded.

Thus the flush could use the host EL1 virtual address,
but use the EL2 translations instead of host EL1, for writing
out any cached data.

Fix this by moving the SPE buffer handling early enough.
The restore path is doing the right thing.

Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow")
Cc: sta...@vger.kernel.org
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Catalin Marinas 
Cc: Mark Rutland 
Cc: Alexandru Elisei 
Reviewed-by: Alexandru Elisei 
Signed-off-by: Suzuki K Poulose 
---
 arch/arm64/include/asm/kvm_hyp.h   |  5 +
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c   | 11 ++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c0450828378b..385bd7dd3d39 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context 
*ctxt);
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c 
b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 91a711aa8382..f401724f12ef 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
/* Disable and flush SPE data generation */
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_guest_common(vcpu);
 }
 
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
__debug_switch_to_host_common(vcpu);
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f3d0e9eca56c..59aa1045fdaf 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
 
__sysreg_save_state_nvhe(host_ctxt);
+   /*
+* We must flush and disable the SPE buffer for nVHE, as
+* the translation regime(EL1&0) is going to be loaded with
+* that of the guest. And we must do this before we change the
+* translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
+* before we load guest Stage1.
+*/
+   __debug_save_host_buffers_nvhe(vcpu);
 
__adjust_pc(vcpu);
 
@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
 
+   __debug_switch_to_host(vcpu);
/*
 * This must come after restoring the host sysregs, since a non-VHE
 * system may enable SPE here and make use of the TTBRs.
 */
-   __debug_switch_to_host(vcpu);
+   __debug_restore_host_buffers_nvhe(vcpu);
 
if (pmu_switch_needed)
__pmu_switch_to_host(host_ctxt);
-- 
2.24.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [RFC PATCH 0/7] kvm: arm64: Implement SW/HW combined dirty log

2021-03-02 Thread Keqian Zhu

Hi everyone,

Any comments are welcome :).

Thanks,
Keqian

On 2021/1/26 20:44, Keqian Zhu wrote:
> The intention:
> 
> On arm64 platform, we tracking dirty log of vCPU through guest memory abort.
> KVM occupys some vCPU time of guest to change stage2 mapping and mark dirty.
> This leads to heavy side effect on VM, especially when multi vCPU race and
> some of them block on kvm mmu_lock.
> 
> DBM is a HW auxiliary approach to log dirty. MMU chages PTE to be writable if
> its DBM bit is set. Then KVM doesn't occupy vCPU time to log dirty.
> 
> About this patch series:
> 
> The biggest problem of apply DBM for stage2 is that software must scan PTs to
> collect dirty state, which may cost much time and affect downtime of 
> migration.
> 
> This series realize a SW/HW combined dirty log that can effectively solve this
> problem (The smmu side can also use this approach to solve dma dirty log 
> tracking).
> 
> The core idea is that we do not enable hardware dirty at start (do not add 
> DBM bit).
> When a arbitrary PT occurs fault, we execute soft tracking for this PT and 
> enable
> hardware tracking for its *nearby* PTs (e.g. Add DBM bit for nearby 16PTs). 
> Then when
> sync dirty log, we have known all PTs with hardware dirty enabled, so we do 
> not need
> to scan all PTs.
> 
> mem abort point mem abort point
>   ↓↓
> ---
> |||||||
> ---
>  ↑↑
> set DBM bit of   set DBM bit of
>  this PT section (64PTEs)  this PT section (64PTEs)
> 
> We may worry that when dirty rate is over-high we still need to scan too much 
> PTs.
> We mainly concern the VM stop time. With Qemu dirty rate throttling, the 
> dirty memory
> is closing to the VM stop threshold, so there is a little PTs to scan after 
> VM stop.
> 
> It has the advantages of hardware tracking that minimizes side effect on vCPU,
> and also has the advantages of software tracking that controls vCPU dirty 
> rate.
> Moreover, software tracking helps us to scan PTs at some fixed points, which
> greatly reduces scanning time. And the biggest benefit is that we can apply 
> this
> solution for dma dirty tracking.
> 
> Test:
> 
> Host: Kunpeng 920 with 128 CPU 512G RAM. Disable Transparent Hugepage (Ensure 
> test result
>   is not effected by dissolve of block page table at the early stage of 
> migration).
> VM:   16 CPU 16GB RAM. Run 4 pair of (redis_benchmark+redis_server).
> 
> Each run 5 times for software dirty log and SW/HW conbined dirty log. 
> 
> Test result:
> 
> Gain 5%~7% improvement of redis QPS during VM migration.
> VM downtime is not affected fundamentally.
> About 56.7% of DBM is effectively used.
> 
> Keqian Zhu (7):
>   arm64: cpufeature: Add API to report system support of HWDBM
>   kvm: arm64: Use atomic operation when update PTE
>   kvm: arm64: Add level_apply parameter for stage2_attr_walker
>   kvm: arm64: Add some HW_DBM related pgtable interfaces
>   kvm: arm64: Add some HW_DBM related mmu interfaces
>   kvm: arm64: Only write protect selected PTE
>   kvm: arm64: Start up SW/HW combined dirty log
> 
>  arch/arm64/include/asm/cpufeature.h  |  12 +++
>  arch/arm64/include/asm/kvm_host.h|   6 ++
>  arch/arm64/include/asm/kvm_mmu.h |   7 ++
>  arch/arm64/include/asm/kvm_pgtable.h |  45 ++
>  arch/arm64/kvm/arm.c | 125 ++
>  arch/arm64/kvm/hyp/pgtable.c | 130 ++-
>  arch/arm64/kvm/mmu.c |  47 +-
>  arch/arm64/kvm/reset.c   |   8 +-
>  8 files changed, 351 insertions(+), 29 deletions(-)
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Re: [PATCH v14 05/13] iommu/smmuv3: Implement attach/detach_pasid_table

2021-03-02 Thread Keqian Zhu

Hi Eric,

On 2021/2/24 4:56, Eric Auger wrote:
> On attach_pasid_table() we program STE S1 related info set
> by the guest into the actual physical STEs. At minimum
> we need to program the context descriptor GPA and compute
> whether the stage1 is translated/bypassed or aborted.
> 
> On detach, the stage 1 config is unset and the abort flag is
> unset.
> 
> Signed-off-by: Eric Auger 
> 
[...]

> +
> + /*
> +  * we currently support a single CD so s1fmt and s1dss
> +  * fields are also ignored
> +  */
> + if (cfg->pasid_bits)
> + goto out;
> +
> + smmu_domain->s1_cfg.cdcfg.cdtab_dma = cfg->base_ptr;
only the "cdtab_dma" field of "cdcfg" is set, we are not able to locate a 
specific cd using arm_smmu_get_cd_ptr().

Maybe we'd better use a specialized function to fill other fields of "cdcfg" or 
add a sanity check in arm_smmu_get_cd_ptr()
to prevent calling it under nested mode?

As now we just call arm_smmu_get_cd_ptr() during finalise_s1(), no problem 
found. Just a suggestion ;-)

Thanks,
Keqian


> + smmu_domain->s1_cfg.set = true;
> + smmu_domain->abort = false;
> + break;
> + default:
> + goto out;
> + }
> + spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> + list_for_each_entry(master, &smmu_domain->devices, domain_head)
> + arm_smmu_install_ste_for_dev(master);
> + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> + ret = 0;
> +out:
> + mutex_unlock(&smmu_domain->init_mutex);
> + return ret;
> +}
> +
> +static void arm_smmu_detach_pasid_table(struct iommu_domain *domain)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_master *master;
> + unsigned long flags;
> +
> + mutex_lock(&smmu_domain->init_mutex);
> +
> + if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> + goto unlock;
> +
> + smmu_domain->s1_cfg.set = false;
> + smmu_domain->abort = false;
> +
> + spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> + list_for_each_entry(master, &smmu_domain->devices, domain_head)
> + arm_smmu_install_ste_for_dev(master);
> + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +
> +unlock:
> + mutex_unlock(&smmu_domain->init_mutex);
> +}
> +
>  static bool arm_smmu_dev_has_feature(struct device *dev,
>enum iommu_dev_features feat)
>  {
> @@ -2939,6 +3026,8 @@ static struct iommu_ops arm_smmu_ops = {
>   .of_xlate   = arm_smmu_of_xlate,
>   .get_resv_regions   = arm_smmu_get_resv_regions,
>   .put_resv_regions   = generic_iommu_put_resv_regions,
> + .attach_pasid_table = arm_smmu_attach_pasid_table,
> + .detach_pasid_table = arm_smmu_detach_pasid_table,
>   .dev_has_feat   = arm_smmu_dev_has_feature,
>   .dev_feat_enabled   = arm_smmu_dev_feature_enabled,
>   .dev_enable_feat= arm_smmu_dev_enable_feature,
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

47 matches

Mail list logo