[PATCH] arm64/perf: Replace '0xf' instances with ID_AA64DFR0_PMUVER_IMP_DEF

2021-08-09 Thread Anshuman Khandual
ID_AA64DFR0_PMUVER_IMP_DEF which indicate implementation defined PMU, never
actually gets used although there are '0xf' instances scattered all around.
Just do the macro replacement to improve readability.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: Peter Zijlstra 
Cc: Marc Zyngier 
Cc: linux-perf-us...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
This applies on v5.14-rc5

 arch/arm64/include/asm/cpufeature.h | 2 +-
 arch/arm64/kernel/perf_event.c  | 2 +-
 arch/arm64/kvm/perf.c   | 2 +-
 arch/arm64/kvm/pmu-emul.c   | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..54474e76ad86 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -552,7 +552,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, 
u64 cap)
u64 mask = GENMASK_ULL(field + 3, field);
 
/* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
-   if (val == 0xf)
+   if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
val = 0;
 
if (val > cap) {
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index d07788dad388..b4044469527e 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1055,7 +1055,7 @@ static void __armv8pmu_probe_pmu(void *info)
dfr0 = read_sysreg(id_aa64dfr0_el1);
pmuver = cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_PMUVER_SHIFT);
-   if (pmuver == 0xf || pmuver == 0)
+   if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0)
return;
 
cpu_pmu->pmuver = pmuver;
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index 151c31fb9860..f9bb3b14130e 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
-   if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
+   if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && 
!is_protected_kvm_enabled())
static_branch_enable(_arm_pmu_available);
 
return perf_register_guest_info_callbacks(_guest_cbs);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f33825c995cb..60f89bdbeebb 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
-   int pmuver = 0xf;
+   int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
 
/*
 * Create a dummy event that only counts user cycles. As we'll never
@@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
-   return 0xf;
+   return ID_AA64DFR0_PMUVER_IMP_DEF;
}
 
if (event->pmu) {
@@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct 
kvm_device_attr *attr)
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
 
-   if (vcpu->kvm->arch.pmuver == 0xf)
+   if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
return -ENODEV;
 
switch (attr->attr) {
-- 
2.20.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] arm64/mm: Define ID_AA64MMFR0_TGRAN_2_SHIFT

2021-08-09 Thread Anshuman Khandual
Streamline the Stage-2 TGRAN value extraction from ID_AA64MMFR0 register by
adding a page size agnostic ID_AA64MMFR0_TGRAN_2_SHIFT. This is similar to
the existing Stage-1 TGRAN shift i.e ID_AA64MMFR0_TGRAN_SHIFT.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Marc Zyngier 
Cc: linux-arm-ker...@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
This applies on v5.14-rc5.

 arch/arm64/include/asm/sysreg.h |  3 +++
 arch/arm64/kvm/reset.c  | 17 ++---
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..943d31d92b5b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1028,14 +1028,17 @@
 
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT   ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN   ID_AA64MMFR0_TGRAN4_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX   0x7
 #elif defined(CONFIG_ARM64_16K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT   ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN   ID_AA64MMFR0_TGRAN16_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX   0xF
 #elif defined(CONFIG_ARM64_64K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT   ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN   ID_AA64MMFR0_TGRAN64_SUPPORTED
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX   0x7
 #endif
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index cba7872d69a8..20588220fe66 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -311,7 +311,7 @@ u32 get_kvm_ipa_limit(void)
 
 int kvm_set_ipa_limit(void)
 {
-   unsigned int parange, tgran_2;
+   unsigned int parange;
u64 mmfr0;
 
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
@@ -322,20 +322,7 @@ int kvm_set_ipa_limit(void)
 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
 * Stage-2. If not, things will stop very quickly.
 */
-   switch (PAGE_SIZE) {
-   default:
-   case SZ_4K:
-   tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
-   break;
-   case SZ_16K:
-   tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
-   break;
-   case SZ_64K:
-   tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
-   break;
-   }
-
-   switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
+   switch (cpuid_feature_extract_unsigned_field(mmfr0, 
ID_AA64MMFR0_TGRAN_2_SHIFT)) {
case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
return -EINVAL;
-- 
2.20.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 00/21] KVM: Add idempotent controls for migrating system counter state

2021-08-09 Thread Oliver Upton
On Wed, Aug 4, 2021 at 3:03 PM Oliver Upton  wrote:
>
> On Wed, Aug 4, 2021 at 4:05 AM Oliver Upton  wrote:
> >
> > On Wed, Aug 4, 2021 at 1:58 AM Oliver Upton  wrote:
> > >
> > > KVM's current means of saving/restoring system counters is plagued with
> > > temporal issues. At least on ARM64 and x86, we migrate the guest's
> > > system counter by-value through the respective guest system register
> > > values (cntvct_el0, ia32_tsc). Restoring system counters by-value is
> > > brittle as the state is not idempotent: the host system counter is still
> > > oscillating between the attempted save and restore. Furthermore, VMMs
> > > may wish to transparently live migrate guest VMs, meaning that they
> > > include the elapsed time due to live migration blackout in the guest
> > > system counter view. The VMM thread could be preempted for any number of
> > > reasons (scheduler, L0 hypervisor under nested) between the time that
> > > it calculates the desired guest counter value and when KVM actually sets
> > > this counter state.
> > >
> > > Despite the value-based interface that we present to userspace, KVM
> > > actually has idempotent guest controls by way of system counter offsets.
> > > We can avoid all of the issues associated with a value-based interface
> > > by abstracting these offset controls in new ioctls. This series
> > > introduces new vCPU device attributes to provide userspace access to the
> > > vCPU's system counter offset.
> > >
> > > Patch 1 addresses a possible race in KVM_GET_CLOCK where
> > > use_master_clock is read outside of the pvclock_gtod_sync_lock.
> > >
> > > Patch 2 adopts Paolo's suggestion, augmenting the KVM_{GET,SET}_CLOCK
> > > ioctls to provide userspace with a (host_tsc, realtime) instant. This is
> > > essential for a VMM to perform precise migration of the guest's system
> > > counters.
> > >
> > > Patches 3-4 are some preparatory changes for exposing the TSC offset to
> > > userspace. Patch 5 provides a vCPU attribute to provide userspace access
> > > to the TSC offset.
> > >
> > > Patches 6-7 implement a test for the new additions to
> > > KVM_{GET,SET}_CLOCK.
> > >
> > > Patch 8 fixes some assertions in the kvm device attribute helpers.
> > >
> > > Patches 9-10 implement at test for the tsc offset attribute introduced in
> > > patch 5.

Paolo,

Is there anything else you're waiting to see for the x86 portion of
this series after addressing Sean's comments? There's some work
remaining on the arm64 side, though I believe the two architectures
are now disjoint for this series.

--
Thanks,
Oliver

> > > Patches 11-12 lay the groundwork for patch 13, which exposes CNTVOFF_EL2
> > > through the ONE_REG interface.
> > >
> > > Patches 14-15 add test cases for userspace manipulation of the virtual
> > > counter-timer.
> > >
> > > Patches 16-17 add a vCPU attribute to adjust the host-guest offset of an
> > > ARM vCPU, but only implements support for ECV hosts. Patches 18-19 add
> > > support for non-ECV hosts by emulating physical counter offsetting.
> > >
> > > Patch 20 adds test cases for adjusting the host-guest offset, and
> > > finally patch 21 adds a test to measure the emulation overhead of
> > > CNTPCT_EL2.
> > >
> > > This series was tested on both an Ampere Mt. Jade and Haswell systems.
> > > Unfortunately, the ECV portions of this series are untested, as there is
> > > no ECV-capable hardware and the ARM fast models only partially implement
> > > ECV.
> >
> > Small correction: I was only using the foundation model. Apparently
> > the AEM FVP provides full ECV support.
>
> Ok. I've now tested this series on the FVP Base RevC fast model@v8.6 +
> ECV=2. Passes on VHE, fails on nVHE.
>
> I'll respin this series with the fix for nVHE+ECV soon.
>
> --
> Thanks,
> Oliver
>
> >
> > >
> > > Physical counter benchmark
> > > --
> > >
> > > The following data was collected by running 1 iterations of the
> > > benchmark test from Patch 21 on an Ampere Mt. Jade reference server, A 2S
> > > machine with 2 80-core Ampere Altra SoCs. Measurements were collected
> > > for both VHE and nVHE operation using the `kvm-arm.mode=` command-line
> > > parameter.
> > >
> > > nVHE
> > > 
> > >
> > > +++-+
> > > |   Metric   | Native | Trapped |
> > > +++-+
> > > | Average| 54ns   | 148ns   |
> > > | Standard Deviation | 124ns  | 122ns   |
> > > | 95th Percentile| 258ns  | 348ns   |
> > > +++-+
> > >
> > > VHE
> > > ---
> > >
> > > +++-+
> > > |   Metric   | Native | Trapped |
> > > +++-+
> > > | Average| 53ns   | 152ns   |
> > > | Standard Deviation | 92ns   | 94ns|
> > > | 95th Percentile| 204ns  | 307ns   |
> > > +++-+
> > >
> > > This series applies cleanly to kvm/queue at the following commit:
> 

[PATCH v4 17/21] KVM: arm64: Mark host bss and rodata section as shared

2021-08-09 Thread Quentin Perret
As the hypervisor maps the host's .bss and .rodata sections in its
stage-1, make sure to tag them as shared in hyp and host page-tables.

But since the hypervisor relies on the presence of these mappings, we
cannot let the host in complete control of the memory regions -- it
must not unshare or donate them to another entity for example. To
prevent this, let's transfer the ownership of those ranges to the
hypervisor itself, and share the pages back with the host.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 82 +
 1 file changed, 74 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..57c27846320f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
 {
void *start, *end, *virt = hyp_phys_to_virt(phys);
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+   enum kvm_pgtable_prot prot;
int ret, i;
 
/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
if (ret)
return ret;
 
-   ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-   if (ret)
-   return ret;
-
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, 
PAGE_HYP_RO);
if (ret)
return ret;
@@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
if (ret)
return ret;
 
-   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-   if (ret)
-   return ret;
-
ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
if (ret)
return ret;
@@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
unsigned long size,
return ret;
}
 
+   /*
+* Map the host's .bss and .rodata sections RO in the hypervisor, but
+* transfer the ownership from the host to the hypervisor itself to
+* make sure it can't be donated or shared with another entity.
+*
+* The ownership transition requires matching changes in the host
+* stage-2. This will be done later (see finalize_host_mappings()) once
+* the hyp_vmemmap is addressable.
+*/
+   prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+   ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+   if (ret)
+   return ret;
+
+   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+   if (ret)
+   return ret;
+
return 0;
 }
 
@@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
hyp_put_page(, addr);
 }
 
+static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+kvm_pte_t *ptep,
+enum kvm_pgtable_walk_flags flag,
+void * const arg)
+{
+   enum kvm_pgtable_prot prot;
+   enum pkvm_page_state state;
+   kvm_pte_t pte = *ptep;
+   phys_addr_t phys;
+
+   if (!kvm_pte_valid(pte))
+   return 0;
+
+   if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+   return -EINVAL;
+
+   phys = kvm_pte_to_phys(pte);
+   if (!addr_is_memory(phys))
+   return 0;
+
+   /*
+* Adjust the host stage-2 mappings to match the ownership attributes
+* configured in the hypervisor stage-1.
+*/
+   state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+   switch (state) {
+   case PKVM_PAGE_OWNED:
+   return host_stage2_set_owner_locked(phys, PAGE_SIZE, 
pkvm_hyp_id);
+   case PKVM_PAGE_SHARED_OWNED:
+   prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, 
PKVM_PAGE_SHARED_BORROWED);
+   break;
+   case PKVM_PAGE_SHARED_BORROWED:
+   prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+}
+
+static int finalize_host_mappings(void)
+{
+   struct kvm_pgtable_walker walker = {
+   .cb = finalize_host_mappings_walker,
+   .flags  = KVM_PGTABLE_WALK_LEAF,
+   };
+
+   return kvm_pgtable_walk(_pgtable, 0, BIT(pkvm_pgtable.ia_bits), 
);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
struct kvm_host_data *host_data = this_cpu_ptr(_host_data);
@@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
 
+   ret = finalize_host_mappings();
+   if (ret)
+   goto out;
+

[PATCH v4 20/21] KVM: arm64: Restrict EL2 stage-1 changes in protected mode

2021-08-09 Thread Quentin Perret
The host kernel is currently able to change EL2 stage-1 mappings without
restrictions thanks to the __pkvm_create_mappings() hypercall. But in a
world where the host is no longer part of the TCB, this clearly poses a
problem.

To fix this, introduce a new hypercall to allow the host to share a
physical memory page with the hypervisor, and remove the
__pkvm_create_mappings() variant. The new hypercall implements
ownership and permission checks before allowing the sharing operation,
and it annotates the shared page in the hypervisor stage-1 and host
stage-2 page-tables.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h  |  2 +-
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c| 11 +--
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 88 +++
 arch/arm64/kvm/mmu.c  | 28 +-
 5 files changed, 118 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 432a9ea1f02e..aed2aa61766a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -59,7 +59,7 @@
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs  13
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs   14
 #define __KVM_HOST_SMCCC_FUNC___pkvm_init  15
-#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings   16
+#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp16
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0118527b07b0..03e604f842e2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,6 +49,7 @@ extern struct host_kvm host_kvm;
 extern const u8 pkvm_hyp_id;
 
 int __pkvm_prot_finalize(void);
+int __pkvm_host_share_hyp(u64 pfn);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot 
prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c 
b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 7900d5b66ba3..2da6aa8da868 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct 
kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
 }
 
-static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
+static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
 {
-   DECLARE_REG(unsigned long, start, host_ctxt, 1);
-   DECLARE_REG(unsigned long, size, host_ctxt, 2);
-   DECLARE_REG(unsigned long, phys, host_ctxt, 3);
-   DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
+   DECLARE_REG(u64, pfn, host_ctxt, 1);
 
-   cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
+   cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
 }
 
 static void handle___pkvm_create_private_mapping(struct kvm_cpu_context 
*host_ctxt)
@@ -185,7 +182,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__vgic_v3_restore_aprs),
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_cpu_set_vector),
-   HANDLE_FUNC(__pkvm_create_mappings),
+   HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_prot_finalize),
 };
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2991dc6996b9..8165390d3ec9 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -339,6 +339,94 @@ static int host_stage2_idmap(u64 addr)
return ret;
 }
 
+static inline bool check_prot(enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_prot required,
+ enum kvm_pgtable_prot denied)
+{
+   return (prot & (required | denied)) == required;
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+   phys_addr_t addr = hyp_pfn_to_phys(pfn);
+   enum kvm_pgtable_prot prot, cur;
+   void *virt = __hyp_va(addr);
+   enum pkvm_page_state state;
+   kvm_pte_t pte;
+   int ret;
+
+   if (!addr_is_memory(addr))
+   return -EINVAL;
+
+   hyp_spin_lock(_kvm.lock);
+   hyp_spin_lock(_pgd_lock);
+
+   ret = kvm_pgtable_get_leaf(_kvm.pgt, addr, , NULL);
+   if (ret)
+   goto unlock;
+   if (!pte)
+   goto map_shared;
+
+   /*
+* Check attributes in the host stage-2 PTE. We need the page to be:
+*  - mapped RWX as we're sharing memory;
+*  - not borrowed, as that implies absence of ownership.
+* Otherwise, we can't let it got 

[PATCH v4 18/21] KVM: arm64: Remove __pkvm_mark_hyp

2021-08-09 Thread Quentin Perret
Now that we mark memory owned by the hypervisor in the host stage-2
during __pkvm_init(), we no longer need to rely on the host to
explicitly mark the hyp sections later on.

Remove the __pkvm_mark_hyp() hypercall altogether.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_asm.h  |  3 +-
 arch/arm64/kvm/arm.c  | 46 ---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 -
 arch/arm64/kvm/hyp/nvhe/hyp-main.c|  9 
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 19 
 5 files changed, 1 insertion(+), 77 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9f0bf2109be7..432a9ea1f02e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,8 +63,7 @@
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
-#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp  20
-#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc  21
+#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc  20
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..2f378482471b 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1954,57 +1954,11 @@ static void _kvm_host_prot_finalize(void *discard)
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
 }
 
-static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
-   return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
-}
-
-#define pkvm_mark_hyp_section(__section)   \
-   pkvm_mark_hyp(__pa_symbol(__section##_start),   \
-   __pa_symbol(__section##_end))
-
 static int finalize_hyp_mode(void)
 {
-   int cpu, ret;
-
if (!is_protected_kvm_enabled())
return 0;
 
-   ret = pkvm_mark_hyp_section(__hyp_idmap_text);
-   if (ret)
-   return ret;
-
-   ret = pkvm_mark_hyp_section(__hyp_text);
-   if (ret)
-   return ret;
-
-   ret = pkvm_mark_hyp_section(__hyp_rodata);
-   if (ret)
-   return ret;
-
-   ret = pkvm_mark_hyp_section(__hyp_bss);
-   if (ret)
-   return ret;
-
-   ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
-   if (ret)
-   return ret;
-
-   for_each_possible_cpu(cpu) {
-   phys_addr_t start = virt_to_phys((void 
*)kvm_arm_hyp_percpu_base[cpu]);
-   phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
-
-   ret = pkvm_mark_hyp(start, end);
-   if (ret)
-   return ret;
-
-   start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, 
cpu));
-   end = start + PAGE_SIZE;
-   ret = pkvm_mark_hyp(start, end);
-   if (ret)
-   return ret;
-   }
-
/*
 * Flip the static key upfront as that may no longer be possible
 * once the host stage 2 is installed.
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 49db0ec5a606..0118527b07b0 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,7 +49,6 @@ extern struct host_kvm host_kvm;
 extern const u8 pkvm_hyp_id;
 
 int __pkvm_prot_finalize(void);
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot 
prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c 
b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1632f001f4ed..7900d5b66ba3 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -163,14 +163,6 @@ static void handle___pkvm_prot_finalize(struct 
kvm_cpu_context *host_ctxt)
 {
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
 }
-
-static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
-{
-   DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
-   DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
-
-   cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
-}
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -196,7 +188,6 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_create_mappings),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_prot_finalize),
-   HANDLE_FUNC(__pkvm_mark_hyp),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index cb023d31666e..2991dc6996b9 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ 

[PATCH v4 13/21] KVM: arm64: Expose host stage-2 manipulation helpers

2021-08-09 Thread Quentin Perret
We will need to manipulate the host stage-2 page-table from outside
mem_protect.c soon. Introduce two functions allowing this, and make
them usable to users of mem_protect.h.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 18 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 87b1690c439f..0849ee8fa260 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -49,6 +49,8 @@ extern struct host_kvm host_kvm;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot 
prot);
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 6fed6772c673..f95a5a4aa09c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -273,6 +273,22 @@ static int host_stage2_adjust_range(u64 addr, struct 
kvm_mem_range *range)
return 0;
 }
 
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
+enum kvm_pgtable_prot prot)
+{
+   hyp_assert_lock_held(_kvm.lock);
+
+   return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
+}
+
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
+{
+   hyp_assert_lock_held(_kvm.lock);
+
+   return host_stage2_try(kvm_pgtable_stage2_set_owner, _kvm.pgt,
+  addr, size, _s2_pool, owner_id);
+}
+
 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot 
prot)
 {
/*
@@ -309,7 +325,7 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
 
-   ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, 
prot);
+   ret = host_stage2_idmap_locked(range.start, range.end - range.start, 
prot);
 unlock:
hyp_spin_unlock(_kvm.lock);
 
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 21/21] KVM: arm64: Make __pkvm_create_mappings static

2021-08-09 Thread Quentin Perret
The __pkvm_create_mappings() function is no longer used outside of
nvhe/mm.c, make it static.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mm.h | 2 --
 arch/arm64/kvm/hyp/nvhe/mm.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h 
b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c76d7136ed9b..c9a8f535212e 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -24,8 +24,6 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, 
phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
 int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot 
prot);
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
-  unsigned long phys, enum kvm_pgtable_prot prot);
 unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 6fbe8e8030f6..2fabeceb889a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -23,8 +23,8 @@ u64 __io_map_base;
 struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
 unsigned int hyp_memblock_nr;
 
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
- unsigned long phys, enum kvm_pgtable_prot prot)
+static int __pkvm_create_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot 
prot)
 {
int err;
 
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 19/21] KVM: arm64: Refactor protected nVHE stage-1 locking

2021-08-09 Thread Quentin Perret
Refactor the hypervisor stage-1 locking in nVHE protected mode to expose
a new pkvm_create_mappings_locked() function. This will be used in later
patches to allow walking and changing the hypervisor stage-1 without
releasing the lock.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mm.h |  1 +
 arch/arm64/kvm/hyp/nvhe/mm.c | 18 --
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h 
b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 8ec3a5a7744b..c76d7136ed9b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -23,6 +23,7 @@ int hyp_map_vectors(void);
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot 
prot);
 int __pkvm_create_mappings(unsigned long start, unsigned long size,
   unsigned long phys, enum kvm_pgtable_prot prot);
 unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index a8efdf0f9003..6fbe8e8030f6 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -67,13 +67,15 @@ unsigned long __pkvm_create_private_mapping(phys_addr_t 
phys, size_t size,
return addr;
 }
 
-int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot 
prot)
 {
unsigned long start = (unsigned long)from;
unsigned long end = (unsigned long)to;
unsigned long virt_addr;
phys_addr_t phys;
 
+   hyp_assert_lock_held(_pgd_lock);
+
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
 
@@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum 
kvm_pgtable_prot prot)
int err;
 
phys = hyp_virt_to_phys((void *)virt_addr);
-   err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
+   err = kvm_pgtable_hyp_map(_pgtable, virt_addr, PAGE_SIZE,
+ phys, prot);
if (err)
return err;
}
@@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum 
kvm_pgtable_prot prot)
return 0;
 }
 
+int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+{
+   int ret;
+
+   hyp_spin_lock(_pgd_lock);
+   ret = pkvm_create_mappings_locked(from, to, prot);
+   hyp_spin_unlock(_pgd_lock);
+
+   return ret;
+}
+
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
 {
unsigned long start, end;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 09/21] KVM: arm64: Tolerate re-creating hyp mappings to set software bits

2021-08-09 Thread Quentin Perret
The current hypervisor stage-1 mapping code doesn't allow changing an
existing valid mapping. Relax this condition by allowing changes that
only target software bits, as that will soon be needed to annotate shared
pages.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/pgtable.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1ee1168ac32d..2689fcb7901d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -362,6 +362,21 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, 
kvm_pte_t *ptep)
return 0;
 }
 
+static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
+{
+   /*
+* Tolerate KVM recreating the exact same mapping, or changing software
+* bits if the existing mapping was valid.
+*/
+   if (old == new)
+   return false;
+
+   if (!kvm_pte_valid(old))
+   return true;
+
+   return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
+}
+
 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
 {
@@ -371,9 +386,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 
level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
 
-   /* Tolerate KVM recreating the exact same mapping */
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
-   if (old != new && !WARN_ON(kvm_pte_valid(old)))
+   if (hyp_pte_needs_update(old, new))
smp_store_release(ptep, new);
 
data->phys += granule;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 10/21] KVM: arm64: Enable forcing page-level stage-2 mappings

2021-08-09 Thread Quentin Perret
Much of the stage-2 manipulation logic relies on being able to destroy
block mappings if e.g. installing a smaller mapping in the range. The
rationale for this behaviour is that stage-2 mappings can always be
re-created lazily. However, this gets more complicated when the stage-2
page-table is used to store metadata about the underlying pages. In such
cases, destroying a block mapping may lead to losing part of the state,
and confuse the user of those metadata (such as the hypervisor in nVHE
protected mode).

To avoid this, introduce a callback function in the pgtable struct which
is called during all map operations to determine whether the mappings
can use blocks, or should be forced to page granularity. This is used by
the hypervisor when creating the host stage-2 to force page-level
mappings when using non-default protection attributes.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h  | 66 +--
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 34 --
 arch/arm64/kvm/hyp/pgtable.c  | 29 ++--
 3 files changed, 94 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 83c5c97d9eac..2c090b0eee77 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -115,25 +115,6 @@ enum kvm_pgtable_stage2_flags {
KVM_PGTABLE_S2_IDMAP= BIT(1),
 };
 
-/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits:   Maximum input address size, in bits.
- * @start_level:   Level at which the page-table walk starts.
- * @pgd:   Pointer to the first top-level entry of the page-table.
- * @mm_ops:Memory management callbacks.
- * @mmu:   Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- */
-struct kvm_pgtable {
-   u32 ia_bits;
-   u32 start_level;
-   kvm_pte_t   *pgd;
-   struct kvm_pgtable_mm_ops   *mm_ops;
-
-   /* Stage-2 only */
-   struct kvm_s2_mmu   *mmu;
-   enum kvm_pgtable_stage2_flags   flags;
-};
-
 /**
  * enum kvm_pgtable_prot - Page-table permissions and attributes.
  * @KVM_PGTABLE_PROT_X:Execute permission.
@@ -149,11 +130,43 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_DEVICE = BIT(3),
 };
 
-#define PAGE_HYP   (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RW(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RWX   (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
+
+#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
+#define PKVM_HOST_MMIO_PROTKVM_PGTABLE_PROT_RW
+
+#define PAGE_HYP   KVM_PGTABLE_PROT_RW
 #define PAGE_HYP_EXEC  (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
 #define PAGE_HYP_RO(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
+typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
+  enum kvm_pgtable_prot prot);
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits:   Maximum input address size, in bits.
+ * @start_level:   Level at which the page-table walk starts.
+ * @pgd:   Pointer to the first top-level entry of the page-table.
+ * @mm_ops:Memory management callbacks.
+ * @mmu:   Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb:  Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+   u32 ia_bits;
+   u32 start_level;
+   kvm_pte_t   *pgd;
+   struct kvm_pgtable_mm_ops   *mm_ops;
+
+   /* Stage-2 only */
+   struct kvm_s2_mmu   *mmu;
+   enum kvm_pgtable_stage2_flags   flags;
+   kvm_pgtable_force_pte_cb_t  force_pte_cb;
+};
+
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table 
walk.
  * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
@@ -246,21 +259,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 
addr, u64 size, u64 phys,
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
 
 /**
- * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
+ * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:   Uninitialised page-table structure to initialise.
  * @arch:  Arch-specific KVM structure representing the guest virtual
  * machine.
  * @mm_ops:Memory management callbacks.
  * @flags: Stage-2 configuration flags.
+ * 

[PATCH v4 15/21] KVM: arm64: Introduce addr_is_memory()

2021-08-09 Thread Quentin Perret
Introduce a helper usable in nVHE protected mode to check whether a
physical address is in a RAM region or not.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 +++
 2 files changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 23316a021880..49db0ec5a606 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -51,6 +51,7 @@ extern const u8 pkvm_hyp_id;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
+bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot 
prot);
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ee255171945c..cb023d31666e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -197,6 +197,13 @@ static bool find_mem_range(phys_addr_t addr, struct 
kvm_mem_range *range)
return false;
 }
 
+bool addr_is_memory(phys_addr_t phys)
+{
+   struct kvm_mem_range range;
+
+   return find_mem_range(phys, );
+}
+
 static bool range_is_memory(u64 start, u64 end)
 {
struct kvm_mem_range r1, r2;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 08/21] KVM: arm64: Don't overwrite software bits with owner id

2021-08-09 Thread Quentin Perret
We will soon start annotating page-tables with new flags to track shared
pages and such, and we will do so in valid mappings using software bits
in the PTEs, as provided by the architecture. However, it is possible
that we will need to use those flags to annotate invalid mappings as
well in the future, similar to what we do to track page ownership in the
host stage-2.

In order to facilitate the annotation of invalid mappings with such
flags, it would be preferable to re-use the same bits as for valid
mappings (bits [58-55]), but these are currently used for ownership
encoding. Since we have plenty of bits left to use in invalid
mappings, move the ownership bits further down the PTE to avoid the
conflict.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/pgtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 59a394d82de3..1ee1168ac32d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -46,7 +46,7 @@
 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
 #define KVM_MAX_OWNER_ID   1
 
 struct kvm_pgtable_walk_data {
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 04/21] KVM: arm64: Introduce helper to retrieve a PTE and its level

2021-08-09 Thread Quentin Perret
From: Marc Zyngier 

It is becoming a common need to fetch the PTE for a given address
together with its level. Add such a helper.

Signed-off-by: Marc Zyngier 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 19 ++
 arch/arm64/kvm/hyp/pgtable.c | 39 
 2 files changed, 58 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index f004c0115d89..082b9d65f40b 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -432,6 +432,25 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 
addr, u64 size);
 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 struct kvm_pgtable_walker *walker);
 
+/**
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ * with its level.
+ * @pgt:   Page-table structure initialised by kvm_pgtable_*_init().
+ * @addr:  Input address for the start of the walk.
+ * @ptep:  Pointer to storage for the retrieved PTE.
+ * @level: Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+kvm_pte_t *ptep, u32 *level);
+
 /**
  * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
  *  Addresses with compatible permission
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 05321f4165e3..78f36bd5df6c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, 
u64 size,
return _kvm_pgtable_walk(_data);
 }
 
+struct leaf_walk_data {
+   kvm_pte_t   pte;
+   u32 level;
+};
+
+static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+  enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+   struct leaf_walk_data *data = arg;
+
+   data->pte   = *ptep;
+   data->level = level;
+
+   return 0;
+}
+
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+kvm_pte_t *ptep, u32 *level)
+{
+   struct leaf_walk_data data;
+   struct kvm_pgtable_walker walker = {
+   .cb = leaf_walker,
+   .flags  = KVM_PGTABLE_WALK_LEAF,
+   .arg= ,
+   };
+   int ret;
+
+   ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
+  PAGE_SIZE, );
+   if (!ret) {
+   if (ptep)
+   *ptep  = data.pte;
+   if (level)
+   *level = data.level;
+   }
+
+   return ret;
+}
+
 struct hyp_map_data {
u64 phys;
kvm_pte_t   attr;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 05/21] KVM: arm64: Expose page-table helpers

2021-08-09 Thread Quentin Perret
The KVM pgtable API exposes the kvm_pgtable_walk() function to allow
the definition of walkers outside of pgtable.c. However, it is not easy
to implement any of those walkers without some of the low-level helpers.
Move some of them to the header file to allow re-use from other places.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 40 
 arch/arm64/kvm/hyp/pgtable.c | 39 ---
 2 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 082b9d65f40b..6938eac72c1f 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+#define KVM_PTE_VALID  BIT(0)
+
+#define KVM_PTE_ADDR_MASK  GENMASK(47, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+
+static inline bool kvm_pte_valid(kvm_pte_t pte)
+{
+   return pte & KVM_PTE_VALID;
+}
+
+static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
+{
+   u64 pa = pte & KVM_PTE_ADDR_MASK;
+
+   if (PAGE_SHIFT == 16)
+   pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+
+   return pa;
+}
+
+static inline u64 kvm_granule_shift(u32 level)
+{
+   /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+   return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
+}
+
+static inline u64 kvm_granule_size(u32 level)
+{
+   return BIT(kvm_granule_shift(level));
+}
+
+static inline bool kvm_level_supports_block_mapping(u32 level)
+{
+   /*
+* Reject invalid block mappings and don't bother with 4TB mappings for
+* 52-bit PAs.
+*/
+   return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
+}
+
 /**
  * struct kvm_pgtable_mm_ops - Memory management callbacks.
  * @zalloc_page:   Allocate a single zeroed memory page.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 78f36bd5df6c..49d768b92997 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,16 +11,12 @@
 #include 
 #include 
 
-#define KVM_PTE_VALID  BIT(0)
 
 #define KVM_PTE_TYPE   BIT(1)
 #define KVM_PTE_TYPE_BLOCK 0
 #define KVM_PTE_TYPE_PAGE  1
 #define KVM_PTE_TYPE_TABLE 1
 
-#define KVM_PTE_ADDR_MASK  GENMASK(47, PAGE_SHIFT)
-#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
-
 #define KVM_PTE_LEAF_ATTR_LO   GENMASK(11, 2)
 
 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDXGENMASK(4, 2)
@@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data {
u64 end;
 };
 
-static u64 kvm_granule_shift(u32 level)
-{
-   /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
-   return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
-}
-
-static u64 kvm_granule_size(u32 level)
-{
-   return BIT(kvm_granule_shift(level));
-}
-
 #define KVM_PHYS_INVALID (-1ULL)
 
 static bool kvm_phys_is_valid(u64 phys)
@@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys)
return phys < 
BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
 }
 
-static bool kvm_level_supports_block_mapping(u32 level)
-{
-   /*
-* Reject invalid block mappings and don't bother with 4TB mappings for
-* 52-bit PAs.
-*/
-   return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
-}
-
 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
 {
u64 granule = kvm_granule_size(level);
@@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
return __kvm_pgd_page_idx(, -1ULL) + 1;
 }
 
-static bool kvm_pte_valid(kvm_pte_t pte)
-{
-   return pte & KVM_PTE_VALID;
-}
-
 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 {
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
@@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
 }
 
-static u64 kvm_pte_to_phys(kvm_pte_t pte)
-{
-   u64 pa = pte & KVM_PTE_ADDR_MASK;
-
-   if (PAGE_SHIFT == 16)
-   pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
-
-   return pa;
-}
-
 static kvm_pte_t kvm_phys_to_pte(u64 pa)
 {
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 16/21] KVM: arm64: Enable retrieving protections attributes of PTEs

2021-08-09 Thread Quentin Perret
Introduce helper functions in the KVM stage-2 and stage-1 page-table
manipulation library allowing to retrieve the enum kvm_pgtable_prot of a
PTE. This will be useful to implement custom walkers outside of
pgtable.c.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 20 +++
 arch/arm64/kvm/hyp/pgtable.c | 37 
 2 files changed, 57 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index ff9d52f8073a..f1651e0153ad 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -506,4 +506,24 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, 
u64 size,
  */
 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 kvm_pte_t *ptep, u32 *level);
+
+/**
+ * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
+ *stage-2 Page-Table Entry.
+ * @pte:   Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
+ * Page-Table Entry.
+ * @pte:   Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index cff744136044..f8ceebe4982e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -363,6 +363,26 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, 
kvm_pte_t *ptep)
return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
+{
+   enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+   u32 ap;
+
+   if (!kvm_pte_valid(pte))
+   return prot;
+
+   if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
+   prot |= KVM_PGTABLE_PROT_X;
+
+   ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
+   if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
+   prot |= KVM_PGTABLE_PROT_R;
+   else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
+   prot |= KVM_PGTABLE_PROT_RW;
+
+   return prot;
+}
+
 static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
 {
/*
@@ -565,6 +585,23 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, 
enum kvm_pgtable_prot p
return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
+{
+   enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+
+   if (!kvm_pte_valid(pte))
+   return prot;
+
+   if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
+   prot |= KVM_PGTABLE_PROT_R;
+   if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
+   prot |= KVM_PGTABLE_PROT_W;
+   if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
+   prot |= KVM_PGTABLE_PROT_X;
+
+   return prot;
+}
+
 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
 {
if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 01/21] KVM: arm64: Add hyp_spin_is_locked() for basic locking assertions at EL2

2021-08-09 Thread Quentin Perret
From: Will Deacon 

Introduce hyp_spin_is_locked() so that functions can easily assert that
a given lock is held (albeit possibly by another CPU!) without having to
drag full lockdep support up to EL2.

Signed-off-by: Will Deacon 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h 
b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 76b537f8d1c6..04f65b655fcf 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -15,6 +15,7 @@
 
 #include 
 #include 
+#include 
 
 typedef union hyp_spinlock {
u32 __val;
@@ -89,4 +90,11 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
: "memory");
 }
 
+static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
+{
+   hyp_spinlock_t lockval = READ_ONCE(*lock);
+
+   return lockval.owner != lockval.next;
+}
+
 #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 14/21] KVM: arm64: Expose pkvm_hyp_id

2021-08-09 Thread Quentin Perret
Allow references to the hypervisor's owner id from outside
mem_protect.c.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0849ee8fa260..23316a021880 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -46,6 +46,8 @@ struct host_kvm {
 };
 extern struct host_kvm host_kvm;
 
+extern const u8 pkvm_hyp_id;
+
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index f95a5a4aa09c..ee255171945c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 
-static const u8 pkvm_hyp_id = 1;
+const u8 pkvm_hyp_id = 1;
 
 static void *host_s2_zalloc_pages_exact(size_t size)
 {
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 03/21] KVM: arm64: Provide the host_stage2_try() helper macro

2021-08-09 Thread Quentin Perret
We currently unmap all MMIO mappings from the host stage-2 to recycle
the pages whenever we run out. In order to make this pattern easy to
re-use from other places, factor the logic out into a dedicated macro.
While at it, apply the macro for the kvm_pgtable_stage2_set_owner()
calls. They're currently only called early on and are guaranteed to
succeed, but making them robust to the -ENOMEM case doesn't hurt and
will avoid painful debugging sessions later on.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 40 +++
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..74280a753efb 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -208,6 +208,25 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
  prot, _s2_pool);
 }
 
+/*
+ * The pool has been provided with enough pages to cover all of memory with
+ * page granularity, but it is difficult to know how much of the MMIO range
+ * we will need to cover upfront, so we may need to 'recycle' the pages if we
+ * run out.
+ */
+#define host_stage2_try(fn, ...)   \
+   ({  \
+   int __ret;  \
+   hyp_assert_lock_held(_kvm.lock);   \
+   __ret = fn(__VA_ARGS__);\
+   if (__ret == -ENOMEM) { \
+   __ret = host_stage2_unmap_dev_all();\
+   if (!__ret) \
+   __ret = fn(__VA_ARGS__);\
+   }   \
+   __ret;  \
+})
+
 static int host_stage2_idmap(u64 addr)
 {
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
@@ -223,22 +242,7 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
 
-   ret = __host_stage2_idmap(range.start, range.end, prot);
-   if (ret != -ENOMEM)
-   goto unlock;
-
-   /*
-* The pool has been provided with enough pages to cover all of memory
-* with page granularity, but it is difficult to know how much of the
-* MMIO range we will need to cover upfront, so we may need to 'recycle'
-* the pages if we run out.
-*/
-   ret = host_stage2_unmap_dev_all();
-   if (ret)
-   goto unlock;
-
-   ret = __host_stage2_idmap(range.start, range.end, prot);
-
+   ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, 
prot);
 unlock:
hyp_spin_unlock(_kvm.lock);
 
@@ -257,8 +261,8 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
return -EINVAL;
 
hyp_spin_lock(_kvm.lock);
-   ret = kvm_pgtable_stage2_set_owner(_kvm.pgt, start, end - start,
-  _s2_pool, pkvm_hyp_id);
+   ret = host_stage2_try(kvm_pgtable_stage2_set_owner, _kvm.pgt,
+ start, end - start, _s2_pool, pkvm_hyp_id);
hyp_spin_unlock(_kvm.lock);
 
return ret != -EAGAIN ? ret : 0;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 11/21] KVM: arm64: Allow populating software bits

2021-08-09 Thread Quentin Perret
Introduce infrastructure allowing to manipulate software bits in stage-1
and stage-2 page-tables using additional entries in the kvm_pgtable_prot
enum.

This is heavily inspired by Marc's implementation of a similar feature
in the NV patch series, but adapted to allow stage-1 changes as well:

  https://lore.kernel.org/kvmarm/20210510165920.1913477-56-...@kernel.org/

Suggested-by: Marc Zyngier 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 12 +++-
 arch/arm64/kvm/hyp/pgtable.c |  5 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 2c090b0eee77..ff9d52f8073a 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -121,6 +121,10 @@ enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_W:Write permission.
  * @KVM_PGTABLE_PROT_R:Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:   Device attributes.
+ * @KVM_PGTABLE_PROT_SW0:  Software bit 0.
+ * @KVM_PGTABLE_PROT_SW1:  Software bit 1.
+ * @KVM_PGTABLE_PROT_SW2:  Software bit 2.
+ * @KVM_PGTABLE_PROT_SW3:  Software bit 3.
  */
 enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X  = BIT(0),
@@ -128,6 +132,11 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R  = BIT(2),
 
KVM_PGTABLE_PROT_DEVICE = BIT(3),
+
+   KVM_PGTABLE_PROT_SW0= BIT(55),
+   KVM_PGTABLE_PROT_SW1= BIT(56),
+   KVM_PGTABLE_PROT_SW2= BIT(57),
+   KVM_PGTABLE_PROT_SW3= BIT(58),
 };
 
 #define KVM_PGTABLE_PROT_RW(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
@@ -420,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, 
u64 addr);
  * If there is a valid, leaf page-table entry used to translate @addr, then
  * relax the permissions in that entry according to the read, write and
  * execute permissions specified by @prot. No permissions are removed, and
- * TLB invalidation is performed after updating the entry.
+ * TLB invalidation is performed after updating the entry. Software bits cannot
+ * be set or cleared using kvm_pgtable_stage2_relax_perms().
  *
  * Return: 0 on success, negative error code on failure.
  */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index e25d829587b9..cff744136044 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -357,6 +357,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, 
kvm_pte_t *ptep)
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
+   attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
 
return 0;
@@ -558,6 +559,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, 
enum kvm_pgtable_prot p
 
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+   attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
 
return 0;
@@ -1025,6 +1027,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable 
*pgt, u64 addr,
u32 level;
kvm_pte_t set = 0, clr = 0;
 
+   if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
+   return -EINVAL;
+
if (prot & KVM_PGTABLE_PROT_R)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
 
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 12/21] KVM: arm64: Add helpers to tag shared pages in SW bits

2021-08-09 Thread Quentin Perret
We will soon start annotating shared pages in page-tables in nVHE
protected mode. Define all the states in which a page can be (owned,
shared and owned, shared and borrowed), and provide helpers allowing to
convert this into SW bits annotations using the matching prot
attributes.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 26 +++
 1 file changed, 26 insertions(+)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..87b1690c439f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -12,6 +12,32 @@
 #include 
 #include 
 
+/*
+ * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ *   00: The page is owned exclusively by the page-table owner.
+ *   01: The page is owned by the page-table owner, but is shared
+ *   with another entity.
+ *   10: The page is shared with, but not owned by the page-table owner.
+ *   11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+   PKVM_PAGE_OWNED = 0ULL,
+   PKVM_PAGE_SHARED_OWNED  = KVM_PGTABLE_PROT_SW0,
+   PKVM_PAGE_SHARED_BORROWED   = KVM_PGTABLE_PROT_SW1,
+};
+
+#define PKVM_PAGE_STATE_PROT_MASK  (KVM_PGTABLE_PROT_SW0 | 
KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+enum pkvm_page_state state)
+{
+   return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+   return prot & PKVM_PAGE_STATE_PROT_MASK;
+}
+
 struct host_kvm {
struct kvm_arch arch;
struct kvm_pgtable pgt;
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 02/21] KVM: arm64: Introduce hyp_assert_lock_held()

2021-08-09 Thread Quentin Perret
Introduce a poor man's lockdep implementation at EL2 which allows to
BUG() whenever a hyp spinlock is not held when it should. Hide this
feature behind a new Kconfig option that targets the EL2 object
specifically, instead of piggy backing on the existing CONFIG_LOCKDEP.
EL2 cannot WARN() cleanly to report locking issues, hence BUG() is the
only option and it is not clear whether we want this widely enabled.
This is most likely going to be useful for local testing until the EL2
WARN() situation has improved.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/Kconfig |  9 +
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 17 +
 2 files changed, 26 insertions(+)

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a4eba0908bfa..9b9721895e5c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -46,6 +46,15 @@ if KVM
 
 source "virt/kvm/Kconfig"
 
+config NVHE_EL2_DEBUG
+   bool "Debug mode for non-VHE EL2 object"
+   help
+ Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
+ Failure reports will BUG() in the hypervisor. This is intended for
+ local EL2 hypervisor development.
+
+ If unsure, say N.
+
 endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h 
b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 04f65b655fcf..4652fd04bdbe 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -97,4 +97,21 @@ static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
return lockval.owner != lockval.next;
 }
 
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock)
+{
+   /*
+* The __pkvm_init() path accesses protected data-structures without
+* holding locks as the other CPUs are guaranteed to not enter EL2
+* concurrently at this point in time. The point by which EL2 is
+* initialized on all CPUs is reflected in the pkvm static key, so
+* wait until it is set before checking the lock state.
+*/
+   if (static_branch_likely(_protected_mode_initialized))
+   BUG_ON(!hyp_spin_is_locked(lock));
+}
+#else
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { }
+#endif
+
 #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 00/21] Track shared pages at EL2 in protected mode

2021-08-09 Thread Quentin Perret
Hi all,

This is v4 of the patch series previously posted here:

https://lore.kernel.org/lkml/20210729132818.4091769-1-qper...@google.com/

This series aims to improve how the nVHE hypervisor tracks ownership of
memory pages when running in protected mode ("kvm-arm.mode=protected" on
the kernel command line).

The main issue with the existing ownership tracking code is that it is
completely binary: a page is either owned by an entity (e.g. the host)
or not. However, we'll need something smarter to track shared pages, as
is needed for virtio, or even just host/hypervisor communications.

This series introduces a few changes to the kvm page-table library to
allow annotating shared pages in ignored bits (a.k.a. software bits) of
leaf entries, and makes use of that infrastructure to track all pages
that are shared between the host and the hypervisor. We will obviously
want to apply the same treatment to guest stage-2 page-tables, but that
is not really possible to do until EL2 manages them directly, so I'll
keep that for another series.

The series is based on the 5.14-rc5, and has been tested on
AML-S905X-CC (Le Potato) and using various Qemu configurations.

Changes since v3
 - Fixed typos in comments / commit messages;
 - Various small cleanups and refactoring;
 - Rebased on 5.14-rc5.

Marc Zyngier (1):
  KVM: arm64: Introduce helper to retrieve a PTE and its level

Quentin Perret (19):
  KVM: arm64: Introduce hyp_assert_lock_held()
  KVM: arm64: Provide the host_stage2_try() helper macro
  KVM: arm64: Expose page-table helpers
  KVM: arm64: Optimize host memory aborts
  KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED
  KVM: arm64: Don't overwrite software bits with owner id
  KVM: arm64: Tolerate re-creating hyp mappings to set software bits
  KVM: arm64: Enable forcing page-level stage-2 mappings
  KVM: arm64: Allow populating software bits
  KVM: arm64: Add helpers to tag shared pages in SW bits
  KVM: arm64: Expose host stage-2 manipulation helpers
  KVM: arm64: Expose pkvm_hyp_id
  KVM: arm64: Introduce addr_is_memory()
  KVM: arm64: Enable retrieving protections attributes of PTEs
  KVM: arm64: Mark host bss and rodata section as shared
  KVM: arm64: Remove __pkvm_mark_hyp
  KVM: arm64: Refactor protected nVHE stage-1 locking
  KVM: arm64: Restrict EL2 stage-1 changes in protected mode
  KVM: arm64: Make __pkvm_create_mappings static

Will Deacon (1):
  KVM: arm64: Add hyp_spin_is_locked() for basic locking assertions at
EL2

 arch/arm64/include/asm/kvm_asm.h  |   5 +-
 arch/arm64/include/asm/kvm_pgtable.h  | 167 
 arch/arm64/kvm/Kconfig|   9 +
 arch/arm64/kvm/arm.c  |  46 
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  33 ++-
 arch/arm64/kvm/hyp/include/nvhe/mm.h  |   3 +-
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h|  25 ++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c|  20 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 225 +---
 arch/arm64/kvm/hyp/nvhe/mm.c  |  22 +-
 arch/arm64/kvm/hyp/nvhe/setup.c   |  82 +-
 arch/arm64/kvm/hyp/pgtable.c  | 247 +-
 arch/arm64/kvm/mmu.c  |  28 +-
 13 files changed, 628 insertions(+), 284 deletions(-)

-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 07/21] KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED

2021-08-09 Thread Quentin Perret
The ignored bits for both stage-1 and stage-2 page and block
descriptors are in [55:58], so rename KVM_PTE_LEAF_ATTR_S2_IGNORED to
make it applicable to both. And while at it, since these bits are more
commonly known as 'software' bits, rename accordingly.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 4dff2ad39ee4..59a394d82de3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -36,6 +36,8 @@
 
 #define KVM_PTE_LEAF_ATTR_HI   GENMASK(63, 51)
 
+#define KVM_PTE_LEAF_ATTR_HI_SWGENMASK(58, 55)
+
 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
 
 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
@@ -44,8 +46,6 @@
 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
-#define KVM_PTE_LEAF_ATTR_S2_IGNORED   GENMASK(58, 55)
-
 #define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
 #define KVM_MAX_OWNER_ID   1
 
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 06/21] KVM: arm64: Optimize host memory aborts

2021-08-09 Thread Quentin Perret
The kvm_pgtable_stage2_find_range() function is used in the host memory
abort path to try and look for the largest block mapping that can be
used to map the faulting address. In order to do so, the function
currently walks the stage-2 page-table and looks for existing
incompatible mappings within the range of the largest possible block.
If incompatible mappings are found, it tries the same procedure again,
but using a smaller block range, and repeats until a matching range is
found (potentially up to page granularity). While this approach has
benefits (mostly in the fact that it proactively coalesces host stage-2
mappings), it can be slow if the ranges are fragmented, and it isn't
optimized to deal with CPUs faulting on the same IPA as all of them will
do all the work every time.

To avoid these issues, remove kvm_pgtable_stage2_find_range(), and walk
the page-table only once in the host_mem_abort() path to find the
closest leaf to the input address. With this, use the corresponding
range if it is invalid and not owned by another entity. If a valid leaf
is found, return -EAGAIN similar to what is done in the
kvm_pgtable_stage2_map() path to optimize concurrent faults.

Reviewed-by: Fuad Tabba 
Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h  | 30 ---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 45 +++-
 arch/arm64/kvm/hyp/pgtable.c  | 74 ---
 3 files changed, 44 insertions(+), 105 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 6938eac72c1f..83c5c97d9eac 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -154,16 +154,6 @@ enum kvm_pgtable_prot {
 #define PAGE_HYP_RO(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
-/**
- * struct kvm_mem_range - Range of Intermediate Physical Addresses
- * @start: Start of the range.
- * @end:   End of the range.
- */
-struct kvm_mem_range {
-   u64 start;
-   u64 end;
-};
-
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table 
walk.
  * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
@@ -490,24 +480,4 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, 
u64 size,
  */
 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 kvm_pte_t *ptep, u32 *level);
-
-/**
- * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
- *  Addresses with compatible permission
- *  attributes.
- * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr:  Address that must be covered by the range.
- * @prot:  Protection attributes that the range must be compatible with.
- * @range: Range structure used to limit the search space at call time and
- * that will hold the result.
- *
- * The offset of @addr within a page is ignored. An IPA is compatible with 
@prot
- * iff its corresponding stage-2 page-table entry has default ownership and, if
- * valid, is mapped with protection attributes identical to @prot.
- *
- * Return: 0 on success, negative error code on failure.
- */
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot,
- struct kvm_mem_range *range);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 74280a753efb..2148d3968aa5 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -159,6 +159,11 @@ static int host_stage2_unmap_dev_all(void)
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
 }
 
+struct kvm_mem_range {
+   u64 start;
+   u64 end;
+};
+
 static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 {
int cur, left = 0, right = hyp_memblock_nr;
@@ -227,6 +232,44 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
__ret;  \
 })
 
+static inline bool range_included(struct kvm_mem_range *child,
+ struct kvm_mem_range *parent)
+{
+   return parent->start <= child->start && child->end <= parent->end;
+}
+
+static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
+{
+   struct kvm_mem_range cur;
+   kvm_pte_t pte;
+   u32 level;
+   int ret;
+
+   hyp_assert_lock_held(_kvm.lock);
+   ret = kvm_pgtable_get_leaf(_kvm.pgt, addr, , );
+   if (ret)
+   return ret;
+
+   if (kvm_pte_valid(pte))
+   return -EAGAIN;
+
+   if (pte)
+   return -EPERM;
+
+   do {
+   u64 granule = kvm_granule_size(level);

Re: [PATCH v3] memblock: make memblock_find_in_range method private

2021-08-09 Thread Guenter Roeck
On Tue, Aug 03, 2021 at 09:42:18AM +0300, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> There are a lot of uses of memblock_find_in_range() along with
> memblock_reserve() from the times memblock allocation APIs did not exist.
> 
> memblock_find_in_range() is the very core of memblock allocations, so any
> future changes to its internal behaviour would mandate updates of all the
> users outside memblock.
> 
> Replace the calls to memblock_find_in_range() with an equivalent calls to
> memblock_phys_alloc() and memblock_phys_alloc_range() and make
> memblock_find_in_range() private method of memblock.
> 
> This simplifies the callers, ensures that (unlikely) errors in
> memblock_reserve() are handled and improves maintainability of
> memblock_find_in_range().
> 
> Signed-off-by: Mike Rapoport 

I see a number of crashes in next-20210806 when booting x86 images from efi.

[0.00] efi: EFI v2.70 by EDK II
[0.00] efi: SMBIOS=0x1fbcc000 ACPI=0x1fbfa000 ACPI 2.0=0x1fbfa014 
MEMATTR=0x1f25f018
[0.00] SMBIOS 2.8 present.
[0.00] DMI: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
[0.00] last_pfn = 0x1ff50 max_arch_pfn = 0x4
[0.00] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT
[0.00] Kernel panic - not syncing: alloc_low_pages: can not alloc memory
[0.00] CPU: 0 PID: 0 Comm: swapper Not tainted 5.14.0-rc4-next-20210806 
#1
[0.00] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 
02/06/2015
[0.00] Call Trace:
[0.00]  ? dump_stack_lvl+0x57/0x7d
[0.00]  ? panic+0xfc/0x2c6
[0.00]  ? alloc_low_pages+0x117/0x156
[0.00]  ? phys_pmd_init+0x234/0x342
[0.00]  ? phys_pud_init+0x171/0x337
[0.00]  ? __kernel_physical_mapping_init+0xec/0x276
[0.00]  ? init_memory_mapping+0x1ea/0x2aa
[0.00]  ? init_range_memory_mapping+0xdf/0x12e
[0.00]  ? init_mem_mapping+0x1e9/0x26f
[0.00]  ? setup_arch+0x5ff/0xb6d
[0.00]  ? start_kernel+0x71/0x6b4
[0.00]  ? secondary_startup_64_no_verify+0xc2/0xcb

Bisect points to this patch. Reverting it fixes the problem. Key seems to
be the amount of memory configured in qemu; the problem is not seen if
there is 1G or more of memory, but it is seen with all test boots with
512M or 256M of memory. It is also seen with almost all 32-bit efi boots.

The problem is not seen when booting without efi.

Guenter

---
Bisect log:

# bad: [da454ebf578f6c542ba9f5b3ddb98db3ede109c1] Add linux-next specific files 
for 20210809
# good: [36a21d51725af2ce0700c6ebcb6b9594aac658a6] Linux 5.14-rc5
git bisect start 'HEAD' 'v5.14-rc5'
# good: [d22fda64bea5f33000e31e5b7e4ba876bca37436] Merge remote-tracking branch 
'crypto/master'
git bisect good d22fda64bea5f33000e31e5b7e4ba876bca37436
# good: [b084da3a98fad27a39ed5ca64106b86df0417851] Merge remote-tracking branch 
'irqchip/irq/irqchip-next'
git bisect good b084da3a98fad27a39ed5ca64106b86df0417851
# good: [a5383d1f57190a33c6afc25c62b9907d84ba2bc6] Merge remote-tracking branch 
'staging/staging-next'
git bisect good a5383d1f57190a33c6afc25c62b9907d84ba2bc6
# good: [a439da3e6abeb054f4e6b0d37814e762b7340196] Merge remote-tracking branch 
'seccomp/for-next/seccomp'
git bisect good a439da3e6abeb054f4e6b0d37814e762b7340196
# bad: [9801f3c0890c7b992b45a5c2afcb16c5cdc8388e] mm/idle_page_tracking: Make 
PG_idle reusable
git bisect bad 9801f3c0890c7b992b45a5c2afcb16c5cdc8388e
# good: [b4f7f4a9b542836683308d48ffdd18471c6f3e76] 
lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable-fix
git bisect good b4f7f4a9b542836683308d48ffdd18471c6f3e76
# good: [e30842a48c36f094271eea0984bb861b49c49c87] mm/vmscan: add 'else' to 
remove check_pending label
git bisect good e30842a48c36f094271eea0984bb861b49c49c87
# bad: [65300b20a21214fb2043419d4e5da1d9947c6e15] mm/madvise: add MADV_WILLNEED 
to process_madvise()
git bisect bad 65300b20a21214fb2043419d4e5da1d9947c6e15
# bad: [7348da7a8c244d1a755bc5838b04cb9b1b6ee06c] memblock: make 
memblock_find_in_range method private
git bisect bad 7348da7a8c244d1a755bc5838b04cb9b1b6ee06c
# good: [98f8c467fe2ba8e553b450b2a3294d69f1f2027f] 
mm-mempolicy-convert-from-atomic_t-to-refcount_t-on-mempolicy-refcnt-fix
git bisect good 98f8c467fe2ba8e553b450b2a3294d69f1f2027f
# good: [760ded422ebe4f8899905b752d8378c44f2a78f3] mm/memplicy: add page 
allocation function for MPOL_PREFERRED_MANY policy
git bisect good 760ded422ebe4f8899905b752d8378c44f2a78f3
# good: [fbfa0492d9639b67119d3d94b7a6a3f85e064260] mm/mempolicy: advertise new 
MPOL_PREFERRED_MANY
git bisect good fbfa0492d9639b67119d3d94b7a6a3f85e064260
# good: [ff6d5759a871883aeea38309fb16d91666179328] mm/mempolicy: unify the 
create() func for bind/interleave/prefer-many policies
git bisect good ff6d5759a871883aeea38309fb16d91666179328
# first bad commit: [7348da7a8c244d1a755bc5838b04cb9b1b6ee06c]

RE: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU schedule out

2021-08-09 Thread Shameerali Kolothum Thodi



> -Original Message-
> From: Will Deacon [mailto:w...@kernel.org]
> Sent: 09 August 2021 14:09
> To: Shameerali Kolothum Thodi 
> Cc: linux-arm-ker...@lists.infradead.org; kvmarm@lists.cs.columbia.edu;
> linux-ker...@vger.kernel.org; m...@kernel.org; catalin.mari...@arm.com;
> james.mo...@arm.com; julien.thierry.k...@gmail.com;
> suzuki.poul...@arm.com; jean-phili...@linaro.org;
> alexandru.eli...@arm.com; qper...@google.com; Linuxarm
> 
> Subject: Re: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU
> schedule out
> 
> On Fri, Aug 06, 2021 at 12:24:36PM +, Shameerali Kolothum Thodi
> wrote:
> > These are some test numbers with and without this patch, run on two
> > different test setups.
> >
> >
> > a)Test Setup -1
> > ---
> >
> > Platform: HiSilicon D06 with 128 CPUs, VMID bits = 16
> > Run 128 VMs concurrently each with 2 vCPUs. Each Guest will execute
> hackbench
> > 5 times before exiting.
> >
> > Measurements taken avg. of 10 Runs.
> >
> > Image : 5.14-rc3
> > ---
> >   Time(s)   44.43813888
> >   No. of exits145,348,264
> >
> > Image: 5.14-rc3 + vmid-v3
> > 
> >   Time(s)46.59789034
> >   No. of exits 133,587,307
> >
> > %diff against 5.14-rc3
> >   Time: 4.8% more
> >   Exits: 8% less
> >
> > Image: 5.14-rc3 + vmid-v3 + Without active_asid clear
> > ---
> >   Time(s) 44.5031782
> >   No. of exits  144,443,188
> >
> > %diff against 5.14-rc3
> >   Time: 0.15% more
> >   Exits: 2.42% less
> >
> > b)Test Setup -2
> > ---
> >
> > Platform: HiSilicon D06 + Kernel with maxcpus set to 8 and VMID bits set to
> 4.
> > Run 40 VMs concurrently each with 2 vCPUs. Each Guest will execute
> hackbench
> > 5 times before exiting.
> >
> > Measurements taken avg. of 10 Runs.
> >
> > Image : 5.14-rc3-vmid4bit
> > 
> >   Time(s)46.19963266
> >   No. of exits 23,699,546
> >
> > Image: 5.14-rc3-vmid4bit + vmid-v3
> > ---
> >   Time(s)  45.83307736
> >   No. of exits  23,260,203
> >
> > %diff against 5.14-rc3-vmid4bit
> >   Time: 0.8% less
> >   Exits: 1.85% less
> >
> > Image: 5.14-rc3-vmid4bit + vmid-v3 + Without active_asid clear
> > -
> >   Time(s)   44.5031782
> >   No. of exits144,443,188
> 
> Really? The *exact* same numbers as the "Image: 5.14-rc3 + vmid-v3 +
> Without
> active_asid clear" configuration? Guessing a copy-paste error here.
> 
> > %diff against 5.14-rc3-vmid4bit
> >   Time: 1.05% less
> >   Exits: 2.06% less
> >
> > As expected, the active_asid clear on schedule out is not helping.
> > But without this patch, the numbers seems to be better than the
> > vanilla kernel when we force the setup(cpus=8, vmd=4bits)
> > to perform rollover.
> 
> I'm struggling a bit to understand these numbers. Are you saying that
> clearing the active_asid helps in the 16-bit VMID case but not in the
> 4-bit case?

Nope, the other way around.. The point I was trying to make is that
clearing the active_vmids definitely have an impact in 16-bit vmid
case, where rollover is not happening, as it ends up taking the slow
path more frequently.

Test setup-1, case 2(with active_vmids clear): Around 4.8% more time
to finish the test compared to vanilla kernel.

Test setup-1, case 3(Without clear): 0.15% more time compared to
vanilla kernel.

For the 4-bit vmid case, the impact of clearing vmids is not that obvious
probably because we have more rollovers.

Test setup-2, case 2(with active_vmids clear):0.8% less time compared to 
vanilla.
Test setup-2, case 3(Without clear): 1.05% less time compared to vanilla kernel.
 
So between the two(with and without clearing the active_vmids), the "without" 
one has better numbers for both Test setups.

> Why would the active_asid clear have any impact on the number of exits?

In 16 bit vmid case, it looks like the no. of exits is considerably lower if we 
clear
active_vmids. . Not sure it is because of the frequent slow path or not. But 
anyway,
the time to finish the test is higher.
 
> The problem I see with not having the active_asid clear is that we will
> roll over more frequently as the number of reserved VMIDs increases.

Ok. The idea of running the 4-bit test setup was to capture that. It doesn't
look like it has a major impact when compared to the original kernel. May be
I should take an average of more test runs. Please let me know if there is a 
better way to measure that impact.

Hope, I am clear.

Thanks,
Shameer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU schedule out

2021-08-09 Thread Will Deacon
On Fri, Aug 06, 2021 at 12:24:36PM +, Shameerali Kolothum Thodi wrote:
> These are some test numbers with and without this patch, run on two
> different test setups.
> 
> 
> a)Test Setup -1
> ---
> 
> Platform: HiSilicon D06 with 128 CPUs, VMID bits = 16
> Run 128 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench
> 5 times before exiting.
> 
> Measurements taken avg. of 10 Runs.
> 
> Image : 5.14-rc3
> ---
>   Time(s)   44.43813888
>   No. of exits145,348,264
> 
> Image: 5.14-rc3 + vmid-v3
> 
>   Time(s)46.59789034
>   No. of exits 133,587,307
> 
> %diff against 5.14-rc3
>   Time: 4.8% more
>   Exits: 8% less 
> 
> Image: 5.14-rc3 + vmid-v3 + Without active_asid clear
> ---
>   Time(s) 44.5031782
>   No. of exits  144,443,188
> 
> %diff against 5.14-rc3
>   Time: 0.15% more
>   Exits: 2.42% less
> 
> b)Test Setup -2
> ---
> 
> Platform: HiSilicon D06 + Kernel with maxcpus set to 8 and VMID bits set to 4.
> Run 40 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench
> 5 times before exiting.
> 
> Measurements taken avg. of 10 Runs.
> 
> Image : 5.14-rc3-vmid4bit
> 
>   Time(s)46.19963266
>   No. of exits 23,699,546
> 
> Image: 5.14-rc3-vmid4bit + vmid-v3
> ---
>   Time(s)  45.83307736
>   No. of exits  23,260,203
> 
> %diff against 5.14-rc3-vmid4bit
>   Time: 0.8% less
>   Exits: 1.85% less 
> 
> Image: 5.14-rc3-vmid4bit + vmid-v3 + Without active_asid clear
> -
>   Time(s)   44.5031782
>   No. of exits144,443,188

Really? The *exact* same numbers as the "Image: 5.14-rc3 + vmid-v3 + Without
active_asid clear" configuration? Guessing a copy-paste error here.

> %diff against 5.14-rc3-vmid4bit
>   Time: 1.05% less
>   Exits: 2.06% less
> 
> As expected, the active_asid clear on schedule out is not helping.
> But without this patch, the numbers seems to be better than the
> vanilla kernel when we force the setup(cpus=8, vmd=4bits)
> to perform rollover.

I'm struggling a bit to understand these numbers. Are you saying that
clearing the active_asid helps in the 16-bit VMID case but not in the
4-bit case?

Why would the active_asid clear have any impact on the number of exits?

The problem I see with not having the active_asid clear is that we will
roll over more frequently as the number of reserved VMIDs increases.

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm