Re: [PATCH v2] memblock: make memblock_find_in_range method private

2021-08-06 Thread Russell King (Oracle)
On Mon, Aug 02, 2021 at 09:37:37AM +0300, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> There are a lot of uses of memblock_find_in_range() along with
> memblock_reserve() from the times memblock allocation APIs did not exist.
> 
> memblock_find_in_range() is the very core of memblock allocations, so any
> future changes to its internal behaviour would mandate updates of all the
> users outside memblock.
> 
> Replace the calls to memblock_find_in_range() with an equivalent calls to
> memblock_phys_alloc() and memblock_phys_alloc_range() and make
> memblock_find_in_range() private method of memblock.
> 
> This simplifies the callers, ensures that (unlikely) errors in
> memblock_reserve() are handled and improves maintainability of
> memblock_find_in_range().
> 
> Signed-off-by: Mike Rapoport 
Acked-by: Russell King (Oracle) 

Thanks.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 4/4] KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE

2021-08-06 Thread Quentin Perret
On Friday 06 Aug 2021 at 12:31:08 (+0100), Will Deacon wrote:
> From: Marc Zyngier 
> 
> Since TLB invalidation can run in parallel with VMID allocation,
> we need to be careful and avoid any sort of load/store tearing.
> Use {READ,WRITE}_ONCE consistently to avoid any surprise.
> 
> Cc: Catalin Marinas 
> Cc: Jade Alglave 
> Cc: Shameer Kolothum 
> Signed-off-by: Marc Zyngier 
> Signed-off-by: Will Deacon 
> ---
>  arch/arm64/include/asm/kvm_mmu.h  | 7 ++-
>  arch/arm64/kvm/arm.c  | 2 +-
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++--
>  arch/arm64/kvm/mmu.c  | 2 +-
>  4 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 934ef0deff9f..5828dd8fa738 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, 
> gpa_t gpa,
>  
>  #define kvm_phys_to_vttbr(addr)  phys_to_ttbr(addr)
>  
> +/*
> + * When this is (directly or indirectly) used on the TLB invalidation
> + * path, we rely on a previously issued DSB so that page table updates
> + * and VMID reads are correctly ordered.
> + */
>  static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
>  {
>   struct kvm_vmid *vmid = >vmid;
> @@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct 
> kvm_s2_mmu *mmu)
>   u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
>  
>   baddr = mmu->pgd_phys;
> - vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
> + vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
>   return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
>  }
>  
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index e9a2b8f27792..658f76067f46 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid)
>   kvm_call_hyp(__kvm_flush_vm_context);
>   }
>  
> - vmid->vmid = kvm_next_vmid;
> + WRITE_ONCE(vmid->vmid, kvm_next_vmid);
>   kvm_next_vmid++;
>   kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
> b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index d4e74ca7f876..55ae97a144b8 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
>   mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
>   mmu->arch = _kvm.arch;
>   mmu->pgt = _kvm.pgt;
> - mmu->vmid.vmid_gen = 0;
> - mmu->vmid.vmid = 0;
> + WRITE_ONCE(mmu->vmid.vmid_gen, 0);
> + WRITE_ONCE(mmu->vmid.vmid, 0);

I'm guessing it should be safe to omit those? But they certainly don't
harm and can serve as documentation anyway, so:

Reviewed-by: Quentin Perret 

Thanks,
Quentin

>  
>   return 0;
>  }
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 3155c9e778f0..b1a6eaec28ff 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct 
> kvm_s2_mmu *mmu)
>   mmu->arch = >arch;
>   mmu->pgt = pgt;
>   mmu->pgd_phys = __pa(pgt->pgd);
> - mmu->vmid.vmid_gen = 0;
> + WRITE_ONCE(mmu->vmid.vmid_gen, 0);
>   return 0;
>  
>  out_destroy_pgtable:
> -- 
> 2.32.0.605.g8dce9f2422-goog
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/4] KVM: arm64: Convert the host S2 over to __load_guest_stage2()

2021-08-06 Thread Quentin Perret
On Friday 06 Aug 2021 at 12:31:07 (+0100), Will Deacon wrote:
> From: Marc Zyngier 
> 
> The protected mode relies on a separate helper to load the
> S2 context. Move over to the __load_guest_stage2() helper
> instead.
> 
> Cc: Catalin Marinas 
> Cc: Jade Alglave 
> Cc: Shameer Kolothum 
> Signed-off-by: Marc Zyngier 
> Signed-off-by: Will Deacon 
> ---
>  arch/arm64/include/asm/kvm_mmu.h  | 11 +++
>  arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c |  2 +-
>  3 files changed, 5 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 05e089653a1a..934ef0deff9f 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct 
> kvm_s2_mmu *mmu)
>   * Must be called from hyp code running at EL2 with an updated VTTBR
>   * and interrupts disabled.
>   */
> -static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned 
> long vtcr)
> +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
> + struct kvm_arch *arch)
>  {
> - write_sysreg(vtcr, vtcr_el2);
> + write_sysreg(arch->vtcr, vtcr_el2);
>   write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
>  
>   /*
> @@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct 
> kvm_s2_mmu *mmu, unsigned long
>   asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
>  }
>  
> -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
> - struct kvm_arch *arch)
> -{
> - __load_stage2(mmu, arch->vtcr);
> -}
> -
>  static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
>  {
>   return container_of(mmu->arch, struct kvm, arch);
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
> b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> index 9c227d87c36d..a910648bc71b 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> @@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context 
> *host_ctxt);
>  static __always_inline void __load_host_stage2(void)
>  {
>   if (static_branch_likely(_protected_mode_initialized))
> - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr);
> + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch);
>   else
>   write_sysreg(0, vttbr_el2);
>  }
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
> b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index d938ce95d3bd..d4e74ca7f876 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void)
>   kvm_flush_dcache_to_poc(params, sizeof(*params));
>  
>   write_sysreg(params->hcr_el2, hcr_el2);
> - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr);
> + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch);

Nit: clearly we're not loading a guest stage-2 here, so maybe the
function should take a more generic name?

Thanks,
Quentin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: (subset) [PATCH 0/4] Fix racing TLBI with ASID/VMID reallocation

2021-08-06 Thread Catalin Marinas
On Fri, 6 Aug 2021 12:31:03 +0100, Will Deacon wrote:
> While reviewing Shameer's reworked VMID allocator [1] and discussing
> with Marc, we spotted a race between TLB invalidation (which typically
> takes an ASID or VMID argument) and reallocation of ASID/VMID for the
> context being targetted.
> 
> The first patch spells out an example with try_to_unmap_one() in a
> comment, which Catalin has kindly modelled in TLA+ at [2].
> 
> [...]

Applied to arm64 (for-next/misc), thanks!

[1/4] arm64: mm: Fix TLBI vs ASID rollover
  https://git.kernel.org/arm64/c/5e10f9887ed8

-- 
Catalin

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V10 01/18] perf/core: Use static_call to optimize perf_guest_info_callbacks

2021-08-06 Thread Zhu Lingshan
From: Like Xu 

For "struct perf_guest_info_callbacks", the two fields "is_in_guest"
and "is_user_mode" are replaced with a new multiplexed member named
"state", and the "get_guest_ip" field will be renamed to "get_ip".

For arm64, xen and kvm/x86, the application of DEFINE_STATIC_CALL_RET0
could make all that perf_guest_cbs stuff suck less. For arm, csky, nds32,
and riscv, just applied some renamed refactoring.

Cc: Will Deacon 
Cc: Marc Zyngier 
Cc: Guo Ren 
Cc: Nick Hu 
Cc: Paul Walmsley 
Cc: Boris Ostrovsky 
Cc: linux-arm-ker...@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-c...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: xen-de...@lists.xenproject.org
Suggested-by: Peter Zijlstra (Intel) 
Original-by: Peter Zijlstra (Intel) 
Signed-off-by: Like Xu 
Signed-off-by: Zhu Lingshan 
Reviewed-by: Boris Ostrovsky 
Acked-by: Peter Zijlstra (Intel) 
---
 arch/arm/kernel/perf_callchain.c   | 16 +++-
 arch/arm64/kernel/perf_callchain.c | 29 +-
 arch/arm64/kvm/perf.c  | 22 -
 arch/csky/kernel/perf_callchain.c  |  4 +--
 arch/nds32/kernel/perf_event_cpu.c | 16 +++-
 arch/riscv/kernel/perf_callchain.c |  4 +--
 arch/x86/events/core.c | 39 --
 arch/x86/events/intel/core.c   |  7 +++---
 arch/x86/include/asm/kvm_host.h|  2 +-
 arch/x86/kvm/pmu.c |  2 +-
 arch/x86/kvm/x86.c | 37 +++-
 arch/x86/xen/pmu.c | 33 ++---
 include/linux/perf_event.h | 12 ++---
 kernel/events/core.c   |  9 +++
 14 files changed, 144 insertions(+), 88 deletions(-)

diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
index 3b69a76d341e..1ce30f86d6c7 100644
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -64,7 +64,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, 
struct pt_regs *regs
 {
struct frame_tail __user *tail;
 
-   if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+   if (perf_guest_cbs && perf_guest_cbs->state()) {
/* We don't support guest os callchain now */
return;
}
@@ -100,7 +100,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx 
*entry, struct pt_regs *re
 {
struct stackframe fr;
 
-   if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+   if (perf_guest_cbs && perf_guest_cbs->state()) {
/* We don't support guest os callchain now */
return;
}
@@ -111,8 +111,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx 
*entry, struct pt_regs *re
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-   if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-   return perf_guest_cbs->get_guest_ip();
+   if (perf_guest_cbs && perf_guest_cbs->state())
+   return perf_guest_cbs->get_ip();
 
return instruction_pointer(regs);
 }
@@ -120,9 +120,13 @@ unsigned long perf_instruction_pointer(struct pt_regs 
*regs)
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
int misc = 0;
+   unsigned int state = 0;
 
-   if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-   if (perf_guest_cbs->is_user_mode())
+   if (perf_guest_cbs)
+   state = perf_guest_cbs->state();
+
+   if (perf_guest_cbs && state) {
+   if (state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/arm64/kernel/perf_callchain.c 
b/arch/arm64/kernel/perf_callchain.c
index 4a72c2727309..1b344e23fd2f 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2015 ARM Limited
  */
 #include 
+#include 
 #include 
 
 #include 
@@ -99,10 +100,25 @@ compat_user_backtrace(struct compat_frame_tail __user 
*tail,
 }
 #endif /* CONFIG_COMPAT */
 
+DEFINE_STATIC_CALL_RET0(arm64_guest_state, *(perf_guest_cbs->state));
+DEFINE_STATIC_CALL_RET0(arm64_guest_get_ip, *(perf_guest_cbs->get_ip));
+
+void arch_perf_update_guest_cbs(void)
+{
+   static_call_update(arm64_guest_state, (void *)&__static_call_return0);
+   static_call_update(arm64_guest_get_ip, (void *)&__static_call_return0);
+
+   if (perf_guest_cbs && perf_guest_cbs->state)
+   static_call_update(arm64_guest_state, perf_guest_cbs->state);
+
+   if (perf_guest_cbs && perf_guest_cbs->get_ip)
+   static_call_update(arm64_guest_get_ip, perf_guest_cbs->get_ip);
+}
+
 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 struct pt_regs *regs)
 {
-   if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+   if (static_call(arm64_guest_state)()) {
/* We don't support guest os 

Re: [PATCH v2] memblock: make memblock_find_in_range method private

2021-08-06 Thread Rafael J. Wysocki
On Mon, Aug 2, 2021 at 8:37 AM Mike Rapoport  wrote:
>
> From: Mike Rapoport 
>
> There are a lot of uses of memblock_find_in_range() along with
> memblock_reserve() from the times memblock allocation APIs did not exist.
>
> memblock_find_in_range() is the very core of memblock allocations, so any
> future changes to its internal behaviour would mandate updates of all the
> users outside memblock.
>
> Replace the calls to memblock_find_in_range() with an equivalent calls to
> memblock_phys_alloc() and memblock_phys_alloc_range() and make
> memblock_find_in_range() private method of memblock.
>
> This simplifies the callers, ensures that (unlikely) errors in
> memblock_reserve() are handled and improves maintainability of
> memblock_find_in_range().
>
> Signed-off-by: Mike Rapoport 

For the ACPI part:

Acked-by: Rafael J. Wysocki 

> ---
> v2: don't change error message in arm::reserve_crashkernel(), per Russell
> v1: https://lore.kernel.org/lkml/20210730104039.7047-1-r...@kernel.org
>
>  arch/arm/kernel/setup.c   | 18 +
>  arch/arm64/kvm/hyp/reserved_mem.c |  9 +++
>  arch/arm64/mm/init.c  | 36 -
>  arch/mips/kernel/setup.c  | 14 +-
>  arch/riscv/mm/init.c  | 44 ++-
>  arch/s390/kernel/setup.c  | 10 ---
>  arch/x86/kernel/aperture_64.c |  5 ++--
>  arch/x86/mm/init.c| 21 +--
>  arch/x86/mm/numa.c|  5 ++--
>  arch/x86/mm/numa_emulation.c  |  5 ++--
>  arch/x86/realmode/init.c  |  2 +-
>  drivers/acpi/tables.c |  5 ++--
>  drivers/base/arch_numa.c  |  5 +---
>  drivers/of/of_reserved_mem.c  | 12 ++---
>  include/linux/memblock.h  |  2 --
>  mm/memblock.c |  2 +-
>  16 files changed, 78 insertions(+), 117 deletions(-)
>
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index f97eb2371672..67f5421b2af7 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void)
> unsigned long long lowmem_max = __pa(high_memory - 1) + 1;
> if (crash_max > lowmem_max)
> crash_max = lowmem_max;
> -   crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max,
> -   crash_size, CRASH_ALIGN);
> +
> +   crash_base = memblock_phys_alloc_range(crash_size, 
> CRASH_ALIGN,
> +  CRASH_ALIGN, 
> crash_max);
> if (!crash_base) {
> pr_err("crashkernel reservation failed - No suitable 
> area found.\n");
> return;
> }
> } else {
> +   unsigned long long crash_max = crash_base + crash_size;
> unsigned long long start;
>
> -   start = memblock_find_in_range(crash_base,
> -  crash_base + crash_size,
> -  crash_size, SECTION_SIZE);
> +   start = memblock_phys_alloc_range(crash_size, SECTION_SIZE,
> + crash_base, crash_max);
> if (start != crash_base) {
> pr_err("crashkernel reservation failed - memory is in 
> use.\n");
> return;
> }
> }
>
> -   ret = memblock_reserve(crash_base, crash_size);
> -   if (ret < 0) {
> -   pr_warn("crashkernel reservation failed - memory is in use 
> (0x%lx)\n",
> -   (unsigned long)crash_base);
> -   return;
> -   }
> -
> pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System 
> RAM: %ldMB)\n",
> (unsigned long)(crash_size >> 20),
> (unsigned long)(crash_base >> 20),
> diff --git a/arch/arm64/kvm/hyp/reserved_mem.c 
> b/arch/arm64/kvm/hyp/reserved_mem.c
> index d654921dd09b..578670e3f608 100644
> --- a/arch/arm64/kvm/hyp/reserved_mem.c
> +++ b/arch/arm64/kvm/hyp/reserved_mem.c
> @@ -92,12 +92,10 @@ void __init kvm_hyp_reserve(void)
>  * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
>  */
> hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
> -   hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
> - ALIGN(hyp_mem_size, PMD_SIZE),
> - PMD_SIZE);
> +   hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
> +  PMD_SIZE);
> if (!hyp_mem_base)
> -   hyp_mem_base = memblock_find_in_range(0, 
> memblock_end_of_DRAM(),
> - hyp_mem_size, 
> PAGE_SIZE);
> +   

Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover

2021-08-06 Thread Catalin Marinas
On Fri, Aug 06, 2021 at 01:42:42PM +0100, Will Deacon wrote:
> On Fri, Aug 06, 2021 at 12:59:28PM +0100, Catalin Marinas wrote:
> > On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote:
> > > diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> > > index 75beffe2ee8a..e9c30859f80c 100644
> > > --- a/arch/arm64/include/asm/mmu.h
> > > +++ b/arch/arm64/include/asm/mmu.h
> > > @@ -27,11 +27,32 @@ typedef struct {
> > >  } mm_context_t;
> > >  
> > >  /*
> > > - * This macro is only used by the TLBI and low-level switch_mm() code,
> > > - * neither of which can race with an ASID change. We therefore don't
> > > - * need to reload the counter using atomic64_read().
> > > + * We use atomic64_read() here because the ASID for an 'mm_struct' can
> > > + * be reallocated when scheduling one of its threads following a
> > > + * rollover event (see new_context() and flush_context()). In this case,
> > > + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
> > > + * may use a stale ASID. This is fine in principle as the new ASID is
> > > + * guaranteed to be clean in the TLB, but the TLBI routines have to take
> > > + * care to handle the following race:
> > > + *
> > > + *CPU 0CPU 1  CPU 2
> > > + *
> > > + *// ptep_clear_flush(mm)
> > > + *xchg_relaxed(pte, 0)
> > > + *DSB ISHST
> > > + *old = ASID(mm)
> > 
> > We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is
> > sufficient to order the pte write with the subsequent ASID read.
> 
> Although I agree that the cat model needs updating and also that the Arm
> ARM isn't helpful by trying to define DMB and DSB at the same time, it
> does clearly state the following:
> 
>   // B2-149
>   | A DSB instruction executed by a PE, PEe, completes when all of the
>   | following apply:
>   |
>   | * All explicit memory accesses of the required access types appearing
>   |   in program order before the DSB are complete for the set of observers
>   |   in the required shareability domain.
> 
>   [...]
> 
>   // B2-150
>   | In addition, no instruction that appears in program order after the
>   | DSB instruction can alter any state of the system or perform any part
>   | of its functionality until the DSB completes other than:
>   |
>   | * Being fetched from memory and decoded.
>   | * Reading the general-purpose, SIMD and floating-point, Special-purpose,
>   |   or System registers that are directly or indirectly read without
>   |   causing side-effects.
> 
> Which means that the ASID read cannot return its data before the DSB ISHST
> has completed and the DSB ISHST cannot complete until the PTE write has
> completed.

Thanks for the explanation.

> > Otherwise the patch looks fine to me:
> > 
> > Reviewed-by: Catalin Marinas 
> 
> Thanks! Do you want to queue it for 5.15? I don't think there's a need to
> rush it into 5.14 given that we don't have any evidence of it happening
> in practice.

Happy to queue it for 5.15.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover

2021-08-06 Thread Will Deacon
On Fri, Aug 06, 2021 at 12:59:28PM +0100, Catalin Marinas wrote:
> On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote:
> > diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> > index 75beffe2ee8a..e9c30859f80c 100644
> > --- a/arch/arm64/include/asm/mmu.h
> > +++ b/arch/arm64/include/asm/mmu.h
> > @@ -27,11 +27,32 @@ typedef struct {
> >  } mm_context_t;
> >  
> >  /*
> > - * This macro is only used by the TLBI and low-level switch_mm() code,
> > - * neither of which can race with an ASID change. We therefore don't
> > - * need to reload the counter using atomic64_read().
> > + * We use atomic64_read() here because the ASID for an 'mm_struct' can
> > + * be reallocated when scheduling one of its threads following a
> > + * rollover event (see new_context() and flush_context()). In this case,
> > + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
> > + * may use a stale ASID. This is fine in principle as the new ASID is
> > + * guaranteed to be clean in the TLB, but the TLBI routines have to take
> > + * care to handle the following race:
> > + *
> > + *CPU 0CPU 1  CPU 2
> > + *
> > + *// ptep_clear_flush(mm)
> > + *xchg_relaxed(pte, 0)
> > + *DSB ISHST
> > + *old = ASID(mm)
> 
> We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is
> sufficient to order the pte write with the subsequent ASID read.

Although I agree that the cat model needs updating and also that the Arm
ARM isn't helpful by trying to define DMB and DSB at the same time, it
does clearly state the following:

  // B2-149
  | A DSB instruction executed by a PE, PEe, completes when all of the
  | following apply:
  |
  | * All explicit memory accesses of the required access types appearing
  |   in program order before the DSB are complete for the set of observers
  |   in the required shareability domain.

  [...]

  // B2-150
  | In addition, no instruction that appears in program order after the
  | DSB instruction can alter any state of the system or perform any part
  | of its functionality until the DSB completes other than:
  |
  | * Being fetched from memory and decoded.
  | * Reading the general-purpose, SIMD and floating-point, Special-purpose,
  |   or System registers that are directly or indirectly read without
  |   causing side-effects.

Which means that the ASID read cannot return its data before the DSB ISHST
has completed and the DSB ISHST cannot complete until the PTE write has
completed.

> Otherwise the patch looks fine to me:
> 
> Reviewed-by: Catalin Marinas 

Thanks! Do you want to queue it for 5.15? I don't think there's a need to
rush it into 5.14 given that we don't have any evidence of it happening
in practice.

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


RE: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU schedule out

2021-08-06 Thread Shameerali Kolothum Thodi



> -Original Message-
> From: Shameerali Kolothum Thodi
> Sent: 03 August 2021 16:57
> To: 'Will Deacon' 
> Cc: linux-arm-ker...@lists.infradead.org; kvmarm@lists.cs.columbia.edu;
> linux-ker...@vger.kernel.org; m...@kernel.org; catalin.mari...@arm.com;
> james.mo...@arm.com; julien.thierry.k...@gmail.com;
> suzuki.poul...@arm.com; jean-phili...@linaro.org;
> alexandru.eli...@arm.com; qper...@google.com; Linuxarm
> 
> Subject: RE: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU
> schedule out
> 
> 
> 
> > -Original Message-
> > From: Will Deacon [mailto:w...@kernel.org]
> > Sent: 03 August 2021 16:31
> > To: Shameerali Kolothum Thodi 
> > Cc: linux-arm-ker...@lists.infradead.org; kvmarm@lists.cs.columbia.edu;
> > linux-ker...@vger.kernel.org; m...@kernel.org; catalin.mari...@arm.com;
> > james.mo...@arm.com; julien.thierry.k...@gmail.com;
> > suzuki.poul...@arm.com; jean-phili...@linaro.org;
> > alexandru.eli...@arm.com; qper...@google.com; Linuxarm
> > 
> > Subject: Re: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU
> > schedule out
> >
> > On Tue, Aug 03, 2021 at 12:55:25PM +, Shameerali Kolothum Thodi
> > wrote:
> > > > > diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
> > > > > index 5584e84aed95..5fd51f5445c1 100644
> > > > > --- a/arch/arm64/kvm/vmid.c
> > > > > +++ b/arch/arm64/kvm/vmid.c
> > > > > @@ -116,6 +116,12 @@ static u64 new_vmid(struct kvm_vmid
> > > > *kvm_vmid)
> > > > >   return idx2vmid(vmid) | generation;
> > > > >  }
> > > > >
> > > > > +/* Call with preemption disabled */
> > > > > +void kvm_arm_vmid_clear_active(void)
> > > > > +{
> > > > > + atomic64_set(this_cpu_ptr(_vmids), 0);
> > > > > +}
> > > >
> > > > I think this is very broken, as it will force everybody to take the
> > > > slow-path when they see an active_vmid of 0.
> > >
> > > Yes. I have seen that happening in my test setup.
> >
> > Why didn't you say so?!
> 
> Sorry. I thought of getting some performance numbers with and
> without this patch and measure the impact. But didn't quite get time
> to finish it yet.

These are some test numbers with and without this patch, run on two
different test setups.


a)Test Setup -1
---

Platform: HiSilicon D06 with 128 CPUs, VMID bits = 16
Run 128 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench
5 times before exiting.

Measurements taken avg. of 10 Runs.

Image : 5.14-rc3
---
  Time(s)   44.43813888
  No. of exits145,348,264

Image: 5.14-rc3 + vmid-v3

  Time(s)46.59789034
  No. of exits 133,587,307

%diff against 5.14-rc3
  Time: 4.8% more
  Exits: 8% less 

Image: 5.14-rc3 + vmid-v3 + Without active_asid clear
---
  Time(s) 44.5031782
  No. of exits  144,443,188

%diff against 5.14-rc3
  Time: 0.15% more
  Exits: 2.42% less

b)Test Setup -2
---

Platform: HiSilicon D06 + Kernel with maxcpus set to 8 and VMID bits set to 4.
Run 40 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench
5 times before exiting.

Measurements taken avg. of 10 Runs.

Image : 5.14-rc3-vmid4bit

  Time(s)46.19963266
  No. of exits 23,699,546

Image: 5.14-rc3-vmid4bit + vmid-v3
---
  Time(s)  45.83307736
  No. of exits  23,260,203

%diff against 5.14-rc3-vmid4bit
  Time: 0.8% less
  Exits: 1.85% less 

Image: 5.14-rc3-vmid4bit + vmid-v3 + Without active_asid clear
-
  Time(s)   44.5031782
  No. of exits144,443,188

%diff against 5.14-rc3-vmid4bit
  Time: 1.05% less
  Exits: 2.06% less

As expected, the active_asid clear on schedule out is not helping.
But without this patch, the numbers seems to be better than the
vanilla kernel when we force the setup(cpus=8, vmd=4bits)
to perform rollover.

Please let me know your thoughts.

Thanks,
Shameer

> 
> >
> > > > It also doesn't solve the issue I mentioned before, as an active_vmid 
> > > > of 0
> > > > means that the reserved vmid is preserved.
> > > >
> > > > Needs more thought...
> > >
> > > How about we clear all the active_vmids in kvm_arch_free_vm() if it
> > > matches the kvm_vmid->id ? But we may have to hold the lock
> > > there
> >
> > I think we have to be really careful not to run into the "suspended
> > animation" problem described in ae120d9edfe9 ("ARM: 7767/1: let the ASID
> > allocator handle suspended animation") if we go down this road.
> 
> 
> Ok. I will go through that.
> 
> > Maybe something along the lines of:
> >
> > ROLLOVER
> >
> >   * Take lock
> >   * Inc generation
> > => This will force everybody down the slow path
> >   * Record active VMIDs
> >   * Broadcast TLBI
> > => Only active VMIDs can be dirty
> > => Reserve 

Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover

2021-08-06 Thread Catalin Marinas
On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote:
> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
> index 75beffe2ee8a..e9c30859f80c 100644
> --- a/arch/arm64/include/asm/mmu.h
> +++ b/arch/arm64/include/asm/mmu.h
> @@ -27,11 +27,32 @@ typedef struct {
>  } mm_context_t;
>  
>  /*
> - * This macro is only used by the TLBI and low-level switch_mm() code,
> - * neither of which can race with an ASID change. We therefore don't
> - * need to reload the counter using atomic64_read().
> + * We use atomic64_read() here because the ASID for an 'mm_struct' can
> + * be reallocated when scheduling one of its threads following a
> + * rollover event (see new_context() and flush_context()). In this case,
> + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
> + * may use a stale ASID. This is fine in principle as the new ASID is
> + * guaranteed to be clean in the TLB, but the TLBI routines have to take
> + * care to handle the following race:
> + *
> + *CPU 0CPU 1  CPU 2
> + *
> + *// ptep_clear_flush(mm)
> + *xchg_relaxed(pte, 0)
> + *DSB ISHST
> + *old = ASID(mm)

We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is
sufficient to order the pte write with the subsequent ASID read.
Otherwise the patch looks fine to me:

Reviewed-by: Catalin Marinas 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] of: restricted dma: Don't fail device probe on rmem init failure

2021-08-06 Thread Will Deacon
On Fri, Aug 06, 2021 at 12:31:05PM +0100, Will Deacon wrote:
> If CONFIG_DMA_RESTRICTED_POOL=n then probing a device with a reference
> to a "restricted-dma-pool" will fail with a reasonably cryptic error:
> 
>   | pci-host-generic: probe of 1.pci failed with error -22
> 
> Print a more helpful message in this case and try to continue probing
> the device as we do if the kernel doesn't have the restricted DMA patches
> applied or either CONFIG_OF_ADDRESS or CONFIG_HAS_DMA =n.
> 
> Cc: Claire Chang 
> Cc: Konrad Rzeszutek Wilk 
> Cc: Robin Murphy 
> Cc: Christoph Hellwig 
> Cc: Rob Herring 
> Signed-off-by: Will Deacon 
> ---
>  drivers/of/address.c| 8 
>  drivers/of/device.c | 2 +-
>  drivers/of/of_private.h | 8 +++-
>  3 files changed, 8 insertions(+), 10 deletions(-)

Sorry, didn't mean to send this patch a second time, it was still kicking
around in my tree from yesterday and I accidentally picked it up when
sending my TLBI series.

Please ignore.

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 4/4] KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE

2021-08-06 Thread Will Deacon
From: Marc Zyngier 

Since TLB invalidation can run in parallel with VMID allocation,
we need to be careful and avoid any sort of load/store tearing.
Use {READ,WRITE}_ONCE consistently to avoid any surprise.

Cc: Catalin Marinas 
Cc: Jade Alglave 
Cc: Shameer Kolothum 
Signed-off-by: Marc Zyngier 
Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/kvm_mmu.h  | 7 ++-
 arch/arm64/kvm/arm.c  | 2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++--
 arch/arm64/kvm/mmu.c  | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 934ef0deff9f..5828dd8fa738 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, 
gpa_t gpa,
 
 #define kvm_phys_to_vttbr(addr)phys_to_ttbr(addr)
 
+/*
+ * When this is (directly or indirectly) used on the TLB invalidation
+ * path, we rely on a previously issued DSB so that page table updates
+ * and VMID reads are correctly ordered.
+ */
 static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 {
struct kvm_vmid *vmid = >vmid;
@@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu 
*mmu)
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
 
baddr = mmu->pgd_phys;
-   vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+   vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e9a2b8f27792..658f76067f46 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid)
kvm_call_hyp(__kvm_flush_vm_context);
}
 
-   vmid->vmid = kvm_next_vmid;
+   WRITE_ONCE(vmid->vmid, kvm_next_vmid);
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d4e74ca7f876..55ae97a144b8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
mmu->arch = _kvm.arch;
mmu->pgt = _kvm.pgt;
-   mmu->vmid.vmid_gen = 0;
-   mmu->vmid.vmid = 0;
+   WRITE_ONCE(mmu->vmid.vmid_gen, 0);
+   WRITE_ONCE(mmu->vmid.vmid, 0);
 
return 0;
 }
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3155c9e778f0..b1a6eaec28ff 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu 
*mmu)
mmu->arch = >arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
-   mmu->vmid.vmid_gen = 0;
+   WRITE_ONCE(mmu->vmid.vmid_gen, 0);
return 0;
 
 out_destroy_pgtable:
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 3/4] KVM: arm64: Convert the host S2 over to __load_guest_stage2()

2021-08-06 Thread Will Deacon
From: Marc Zyngier 

The protected mode relies on a separate helper to load the
S2 context. Move over to the __load_guest_stage2() helper
instead.

Cc: Catalin Marinas 
Cc: Jade Alglave 
Cc: Shameer Kolothum 
Signed-off-by: Marc Zyngier 
Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/kvm_mmu.h  | 11 +++
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  2 +-
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 05e089653a1a..934ef0deff9f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu 
*mmu)
  * Must be called from hyp code running at EL2 with an updated VTTBR
  * and interrupts disabled.
  */
-static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned 
long vtcr)
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
+   struct kvm_arch *arch)
 {
-   write_sysreg(vtcr, vtcr_el2);
+   write_sysreg(arch->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
 
/*
@@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct 
kvm_s2_mmu *mmu, unsigned long
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
-   struct kvm_arch *arch)
-{
-   __load_stage2(mmu, arch->vtcr);
-}
-
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
 {
return container_of(mmu->arch, struct kvm, arch);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h 
b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..a910648bc71b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 static __always_inline void __load_host_stage2(void)
 {
if (static_branch_likely(_protected_mode_initialized))
-   __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr);
+   __load_guest_stage2(_kvm.arch.mmu, _kvm.arch);
else
write_sysreg(0, vttbr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c 
b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..d4e74ca7f876 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void)
kvm_flush_dcache_to_poc(params, sizeof(*params));
 
write_sysreg(params->hcr_el2, hcr_el2);
-   __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr);
+   __load_guest_stage2(_kvm.arch.mmu, _kvm.arch);
 
/*
 * Make sure to have an ISB before the TLB maintenance below but only
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 2/4] KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into the callers

2021-08-06 Thread Will Deacon
From: Marc Zyngier 

It is a bit awkward to use kern_hyp_va() in __load_guest_stage2(),
specially as the helper is shared between VHE and nVHE.

Instead, move the use of kern_hyp_va() in the nVHE code, and
pass a pointer to the kvm->arch structure instead. Although
this may look a bit awkward, it allows for some further simplification.

Cc: Catalin Marinas 
Cc: Jade Alglave 
Cc: Shameer Kolothum 
Signed-off-by: Marc Zyngier 
Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/kvm_mmu.h | 5 +++--
 arch/arm64/kvm/hyp/nvhe/switch.c | 4 +++-
 arch/arm64/kvm/hyp/nvhe/tlb.c| 2 +-
 arch/arm64/kvm/hyp/vhe/switch.c  | 2 +-
 arch/arm64/kvm/hyp/vhe/tlb.c | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b52c5c4b9a3d..05e089653a1a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -280,9 +280,10 @@ static __always_inline void __load_stage2(struct 
kvm_s2_mmu *mmu, unsigned long
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu,
+   struct kvm_arch *arch)
 {
-   __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
+   __load_stage2(mmu, arch->vtcr);
 }
 
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f7af9688c1f7..e50a49082923 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -170,6 +170,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
+   struct kvm_s2_mmu *mmu;
bool pmu_switch_needed;
u64 exit_code;
 
@@ -213,7 +214,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
 
-   __load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu));
+   mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+   __load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
__activate_traps(vcpu);
 
__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 38ed0f6f2703..76229407d8f0 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -39,7 +39,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 * ensuring that we always have an ISB, but not two ISBs back
 * to back.
 */
-   __load_guest_stage2(mmu);
+   __load_guest_stage2(mmu, kern_hyp_va(mmu->arch));
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b3229924d243..0cb7523a501a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -128,7 +128,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 * __load_guest_stage2 configures stage 2 translation, and
 * __activate_traps clear HCR_EL2.TGE (among other things).
 */
-   __load_guest_stage2(vcpu->arch.hw_mmu);
+   __load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
__activate_traps(vcpu);
 
__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 66f17349f0c3..5e9fb3989e0b 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -53,7 +53,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 * place before clearing TGE. __load_guest_stage2() already
 * has an ISB in order to deal with this.
 */
-   __load_guest_stage2(mmu);
+   __load_guest_stage2(mmu, mmu->arch);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] of: restricted dma: Don't fail device probe on rmem init failure

2021-08-06 Thread Will Deacon
If CONFIG_DMA_RESTRICTED_POOL=n then probing a device with a reference
to a "restricted-dma-pool" will fail with a reasonably cryptic error:

  | pci-host-generic: probe of 1.pci failed with error -22

Print a more helpful message in this case and try to continue probing
the device as we do if the kernel doesn't have the restricted DMA patches
applied or either CONFIG_OF_ADDRESS or CONFIG_HAS_DMA =n.

Cc: Claire Chang 
Cc: Konrad Rzeszutek Wilk 
Cc: Robin Murphy 
Cc: Christoph Hellwig 
Cc: Rob Herring 
Signed-off-by: Will Deacon 
---
 drivers/of/address.c| 8 
 drivers/of/device.c | 2 +-
 drivers/of/of_private.h | 8 +++-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 973257434398..f6bf4b423c2a 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -997,7 +997,7 @@ int of_dma_get_range(struct device_node *np, const struct 
bus_dma_region **map)
return ret;
 }
 
-int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
+void of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
 {
struct device_node *node, *of_node = dev->of_node;
int count, i;
@@ -1022,11 +1022,11 @@ int of_dma_set_restricted_buffer(struct device *dev, 
struct device_node *np)
 */
if (of_device_is_compatible(node, "restricted-dma-pool") &&
of_device_is_available(node))
-   return of_reserved_mem_device_init_by_idx(dev, of_node,
- i);
+   break;
}
 
-   return 0;
+   if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i))
+   dev_warn(dev, "failed to initialise \"restricted-dma-pool\" 
memory node\n");
 }
 #endif /* CONFIG_HAS_DMA */
 
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 2defdca418ec..258a2b099410 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -166,7 +166,7 @@ int of_dma_configure_id(struct device *dev, struct 
device_node *np,
arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
if (!iommu)
-   return of_dma_set_restricted_buffer(dev, np);
+   of_dma_set_restricted_buffer(dev, np);
 
return 0;
 }
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index f557bd22b0cf..bc883f69496b 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -163,18 +163,16 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map);
-int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np);
+void of_dma_set_restricted_buffer(struct device *dev, struct device_node *np);
 #else
 static inline int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map)
 {
return -ENODEV;
 }
-static inline int of_dma_set_restricted_buffer(struct device *dev,
-  struct device_node *np)
+static inline void of_dma_set_restricted_buffer(struct device *dev,
+   struct device_node *np)
 {
-   /* Do nothing, successfully. */
-   return 0;
 }
 #endif
 
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover

2021-08-06 Thread Will Deacon
When switching to an 'mm_struct' for the first time following an ASID
rollover, a new ASID may be allocated and assigned to 'mm->context.id'.
This reassignment can happen concurrently with other operations on the
mm, such as unmapping pages and subsequently issuing TLB invalidation.

Consequently, we need to ensure that (a) accesses to 'mm->context.id'
are atomic and (b) all page-table updates made prior to a TLBI using the
old ASID are guaranteed to be visible to CPUs running with the new ASID.

This was found by inspection after reviewing the VMID changes from
Shameer but it looks like a real (yet hard to hit) bug.

Cc: 
Cc: Catalin Marinas 
Cc: Marc Zyngier 
Cc: Jade Alglave 
Cc: Shameer Kolothum 
Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/mmu.h  | 29 +
 arch/arm64/include/asm/tlbflush.h | 11 ++-
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 75beffe2ee8a..e9c30859f80c 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -27,11 +27,32 @@ typedef struct {
 } mm_context_t;
 
 /*
- * This macro is only used by the TLBI and low-level switch_mm() code,
- * neither of which can race with an ASID change. We therefore don't
- * need to reload the counter using atomic64_read().
+ * We use atomic64_read() here because the ASID for an 'mm_struct' can
+ * be reallocated when scheduling one of its threads following a
+ * rollover event (see new_context() and flush_context()). In this case,
+ * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
+ * may use a stale ASID. This is fine in principle as the new ASID is
+ * guaranteed to be clean in the TLB, but the TLBI routines have to take
+ * care to handle the following race:
+ *
+ *CPU 0CPU 1  CPU 2
+ *
+ *// ptep_clear_flush(mm)
+ *xchg_relaxed(pte, 0)
+ *DSB ISHST
+ *old = ASID(mm)
+ * |  
+ * |   new = new_context(mm)
+ * \-> atomic_set(mm->context.id, new)
+ * cpu_switch_mm(mm)
+ * // Hardware walk of pte using new ASID
+ *TLBI(old)
+ *
+ * In this scenario, the barrier on CPU 0 and the dependency on CPU 1
+ * ensure that the page-table walker on CPU 1 *must* see the invalid PTE
+ * written by CPU 0.
  */
-#define ASID(mm)   ((mm)->context.id.counter & 0x)
+#define ASID(mm)   (atomic64_read(&(mm)->context.id) & 0x)
 
 static inline bool arm64_kernel_unmapped_at_el0(void)
 {
diff --git a/arch/arm64/include/asm/tlbflush.h 
b/arch/arm64/include/asm/tlbflush.h
index cc3f5a33ff9c..36f02892e1df 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -245,9 +245,10 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-   unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+   unsigned long asid;
 
dsb(ishst);
+   asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
@@ -256,9 +257,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
 unsigned long uaddr)
 {
-   unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+   unsigned long addr;
 
dsb(ishst);
+   addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
 }
@@ -283,9 +285,7 @@ static inline void __flush_tlb_range(struct vm_area_struct 
*vma,
 {
int num = 0;
int scale = 0;
-   unsigned long asid = ASID(vma->vm_mm);
-   unsigned long addr;
-   unsigned long pages;
+   unsigned long asid, addr, pages;
 
start = round_down(start, stride);
end = round_up(end, stride);
@@ -305,6 +305,7 @@ static inline void __flush_tlb_range(struct vm_area_struct 
*vma,
}
 
dsb(ishst);
+   asid = ASID(vma->vm_mm);
 
/*
 * When the CPU does not support TLB range operations, flush the TLB
-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 0/4] Fix racing TLBI with ASID/VMID reallocation

2021-08-06 Thread Will Deacon
Hi all,

While reviewing Shameer's reworked VMID allocator [1] and discussing
with Marc, we spotted a race between TLB invalidation (which typically
takes an ASID or VMID argument) and reallocation of ASID/VMID for the
context being targetted.

The first patch spells out an example with try_to_unmap_one() in a
comment, which Catalin has kindly modelled in TLA+ at [2].

Although I'm posting all this together for ease of review, the intention
is that the first patch will go via arm64 with the latter going via kvm.

Cheers,

Will

[1] 
https://lore.kernel.org/r/20210729104009.382-1-shameerali.kolothum.th...@huawei.com
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/kernel-tla.git/commit/

Cc: Catalin Marinas 
Cc: Marc Zyngier 
Cc: Jade Alglave 
Cc: Shameer Kolothum 
Cc: 
Cc: 

--->8

Marc Zyngier (3):
  KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into the
callers
  KVM: arm64: Convert the host S2 over to __load_guest_stage2()
  KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE

Will Deacon (1):
  arm64: mm: Fix TLBI vs ASID rollover

 arch/arm64/include/asm/kvm_mmu.h  | 17 ++-
 arch/arm64/include/asm/mmu.h  | 29 ---
 arch/arm64/include/asm/tlbflush.h | 11 +++
 arch/arm64/kvm/arm.c  |  2 +-
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  6 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c  |  4 ++-
 arch/arm64/kvm/hyp/nvhe/tlb.c |  2 +-
 arch/arm64/kvm/hyp/vhe/switch.c   |  2 +-
 arch/arm64/kvm/hyp/vhe/tlb.c  |  2 +-
 arch/arm64/kvm/mmu.c  |  2 +-
 11 files changed, 52 insertions(+), 27 deletions(-)

-- 
2.32.0.605.g8dce9f2422-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC PATCH 14/15] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant.

2021-08-06 Thread Zi Yan
From: Zi Yan 

For other MAX_ORDER uses (described below), there is no need or too much
hassle to convert certain static array to dynamic ones. Add
MIN_MAX_ORDER to serve as compile time constant in place of MAX_ORDER.

ARM64 hypervisor maintains its own free page list and does not import
any core kernel symbols, so soon-to-be runtime variable MAX_ORDER is not
accessible in ARM64 hypervisor code. Also there is no need to allocating
very large pages.

In SLAB/SLOB/SLUB, 2-D array kmalloc_caches uses MAX_ORDER in its second
dimension. It is too much hassle to allocate memory for kmalloc_caches
before any proper memory allocator is set up.

Signed-off-by: Zi Yan 
Cc: Marc Zyngier 
Cc: Catalin Marinas 
Cc: Christoph Lameter 
Cc: Vlastimil Babka 
Cc: Quentin Perret 
Cc: linux-arm-ker...@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/gfp.h | 2 +-
 arch/arm64/kvm/hyp/nvhe/page_alloc.c  | 3 ++-
 include/linux/mmzone.h| 3 +++
 include/linux/slab.h  | 8 
 mm/slab.c | 2 +-
 mm/slub.c | 7 ---
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h 
b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index fb0f523d1492..c774b4a98336 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -16,7 +16,7 @@ struct hyp_pool {
 * API at EL2.
 */
hyp_spinlock_t lock;
-   struct list_head free_area[MAX_ORDER];
+   struct list_head free_area[MIN_MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c 
b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 41fc25bdfb34..a1cc1b648de0 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -226,7 +226,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned 
int nr_pages,
int i;
 
hyp_spin_lock_init(>lock);
-   pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+
+   pool->max_order = min(MIN_MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(>free_area[i]);
pool->range_start = phys;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 09aafc05aef4..379dada82d4b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -27,11 +27,14 @@
 #ifndef CONFIG_ARCH_FORCE_MAX_ORDER
 #ifdef CONFIG_SET_MAX_ORDER
 #define MAX_ORDER CONFIG_SET_MAX_ORDER
+#define MIN_MAX_ORDER CONFIG_SET_MAX_ORDER
 #else
 #define MAX_ORDER 11
+#define MIN_MAX_ORDER MAX_ORDER
 #endif /* CONFIG_SET_MAX_ORDER */
 #else
 #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
+#define MIN_MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
 #endif /* CONFIG_ARCH_FORCE_MAX_ORDER */
 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2c0d80cca6b8..d8747c158db6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -244,8 +244,8 @@ static inline void __check_heap_object(const void *ptr, 
unsigned long n,
  * to do various tricks to work around compiler limitations in order to
  * ensure proper constant folding.
  */
-#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
-   (MAX_ORDER + PAGE_SHIFT - 1) : 25)
+#define KMALLOC_SHIFT_HIGH ((MIN_MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
+   (MIN_MAX_ORDER + PAGE_SHIFT - 1) : 25)
 #define KMALLOC_SHIFT_MAX  KMALLOC_SHIFT_HIGH
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW  5
@@ -258,7 +258,7 @@ static inline void __check_heap_object(const void *ptr, 
unsigned long n,
  * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
 #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
-#define KMALLOC_SHIFT_MAX  (MAX_ORDER + PAGE_SHIFT - 1)
+#define KMALLOC_SHIFT_MAX  (MIN_MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW  3
 #endif
@@ -271,7 +271,7 @@ static inline void __check_heap_object(const void *ptr, 
unsigned long n,
  * be allocated from the same page.
  */
 #define KMALLOC_SHIFT_HIGH PAGE_SHIFT
-#define KMALLOC_SHIFT_MAX  (MAX_ORDER + PAGE_SHIFT - 1)
+#define KMALLOC_SHIFT_MAX  (MIN_MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW  3
 #endif
diff --git a/mm/slab.c b/mm/slab.c
index d0f725637663..0041de8ec0e9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -466,7 +466,7 @@ static int __init slab_max_order_setup(char *str)
 {
get_option(, _max_order);
slab_max_order = slab_max_order < 0 ? 0 :
-   min(slab_max_order, MAX_ORDER - 1);
+   min(slab_max_order, MIN_MAX_ORDER - 1);