Re: [PATCH v2] memblock: make memblock_find_in_range method private
On Mon, Aug 02, 2021 at 09:37:37AM +0300, Mike Rapoport wrote: > From: Mike Rapoport > > There are a lot of uses of memblock_find_in_range() along with > memblock_reserve() from the times memblock allocation APIs did not exist. > > memblock_find_in_range() is the very core of memblock allocations, so any > future changes to its internal behaviour would mandate updates of all the > users outside memblock. > > Replace the calls to memblock_find_in_range() with an equivalent calls to > memblock_phys_alloc() and memblock_phys_alloc_range() and make > memblock_find_in_range() private method of memblock. > > This simplifies the callers, ensures that (unlikely) errors in > memblock_reserve() are handled and improves maintainability of > memblock_find_in_range(). > > Signed-off-by: Mike Rapoport Acked-by: Russell King (Oracle) Thanks. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last! ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH 4/4] KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE
On Friday 06 Aug 2021 at 12:31:08 (+0100), Will Deacon wrote: > From: Marc Zyngier > > Since TLB invalidation can run in parallel with VMID allocation, > we need to be careful and avoid any sort of load/store tearing. > Use {READ,WRITE}_ONCE consistently to avoid any surprise. > > Cc: Catalin Marinas > Cc: Jade Alglave > Cc: Shameer Kolothum > Signed-off-by: Marc Zyngier > Signed-off-by: Will Deacon > --- > arch/arm64/include/asm/kvm_mmu.h | 7 ++- > arch/arm64/kvm/arm.c | 2 +- > arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++-- > arch/arm64/kvm/mmu.c | 2 +- > 4 files changed, 10 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h > b/arch/arm64/include/asm/kvm_mmu.h > index 934ef0deff9f..5828dd8fa738 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, > gpa_t gpa, > > #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) > > +/* > + * When this is (directly or indirectly) used on the TLB invalidation > + * path, we rely on a previously issued DSB so that page table updates > + * and VMID reads are correctly ordered. > + */ > static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) > { > struct kvm_vmid *vmid = >vmid; > @@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct > kvm_s2_mmu *mmu) > u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; > > baddr = mmu->pgd_phys; > - vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; > + vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT; > return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; > } > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index e9a2b8f27792..658f76067f46 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid) > kvm_call_hyp(__kvm_flush_vm_context); > } > > - vmid->vmid = kvm_next_vmid; > + WRITE_ONCE(vmid->vmid, kvm_next_vmid); > kvm_next_vmid++; > kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; > > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c > b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > index d4e74ca7f876..55ae97a144b8 100644 > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > @@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) > mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); > mmu->arch = _kvm.arch; > mmu->pgt = _kvm.pgt; > - mmu->vmid.vmid_gen = 0; > - mmu->vmid.vmid = 0; > + WRITE_ONCE(mmu->vmid.vmid_gen, 0); > + WRITE_ONCE(mmu->vmid.vmid, 0); I'm guessing it should be safe to omit those? But they certainly don't harm and can serve as documentation anyway, so: Reviewed-by: Quentin Perret Thanks, Quentin > > return 0; > } > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 3155c9e778f0..b1a6eaec28ff 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct > kvm_s2_mmu *mmu) > mmu->arch = >arch; > mmu->pgt = pgt; > mmu->pgd_phys = __pa(pgt->pgd); > - mmu->vmid.vmid_gen = 0; > + WRITE_ONCE(mmu->vmid.vmid_gen, 0); > return 0; > > out_destroy_pgtable: > -- > 2.32.0.605.g8dce9f2422-goog > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH 3/4] KVM: arm64: Convert the host S2 over to __load_guest_stage2()
On Friday 06 Aug 2021 at 12:31:07 (+0100), Will Deacon wrote: > From: Marc Zyngier > > The protected mode relies on a separate helper to load the > S2 context. Move over to the __load_guest_stage2() helper > instead. > > Cc: Catalin Marinas > Cc: Jade Alglave > Cc: Shameer Kolothum > Signed-off-by: Marc Zyngier > Signed-off-by: Will Deacon > --- > arch/arm64/include/asm/kvm_mmu.h | 11 +++ > arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- > arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- > 3 files changed, 5 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h > b/arch/arm64/include/asm/kvm_mmu.h > index 05e089653a1a..934ef0deff9f 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct > kvm_s2_mmu *mmu) > * Must be called from hyp code running at EL2 with an updated VTTBR > * and interrupts disabled. > */ > -static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned > long vtcr) > +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, > + struct kvm_arch *arch) > { > - write_sysreg(vtcr, vtcr_el2); > + write_sysreg(arch->vtcr, vtcr_el2); > write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); > > /* > @@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct > kvm_s2_mmu *mmu, unsigned long > asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); > } > > -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, > - struct kvm_arch *arch) > -{ > - __load_stage2(mmu, arch->vtcr); > -} > - > static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) > { > return container_of(mmu->arch, struct kvm, arch); > diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h > b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h > index 9c227d87c36d..a910648bc71b 100644 > --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h > +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h > @@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context > *host_ctxt); > static __always_inline void __load_host_stage2(void) > { > if (static_branch_likely(_protected_mode_initialized)) > - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr); > + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch); > else > write_sysreg(0, vttbr_el2); > } > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c > b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > index d938ce95d3bd..d4e74ca7f876 100644 > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > @@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void) > kvm_flush_dcache_to_poc(params, sizeof(*params)); > > write_sysreg(params->hcr_el2, hcr_el2); > - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr); > + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch); Nit: clearly we're not loading a guest stage-2 here, so maybe the function should take a more generic name? Thanks, Quentin ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: (subset) [PATCH 0/4] Fix racing TLBI with ASID/VMID reallocation
On Fri, 6 Aug 2021 12:31:03 +0100, Will Deacon wrote: > While reviewing Shameer's reworked VMID allocator [1] and discussing > with Marc, we spotted a race between TLB invalidation (which typically > takes an ASID or VMID argument) and reallocation of ASID/VMID for the > context being targetted. > > The first patch spells out an example with try_to_unmap_one() in a > comment, which Catalin has kindly modelled in TLA+ at [2]. > > [...] Applied to arm64 (for-next/misc), thanks! [1/4] arm64: mm: Fix TLBI vs ASID rollover https://git.kernel.org/arm64/c/5e10f9887ed8 -- Catalin ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH V10 01/18] perf/core: Use static_call to optimize perf_guest_info_callbacks
From: Like Xu For "struct perf_guest_info_callbacks", the two fields "is_in_guest" and "is_user_mode" are replaced with a new multiplexed member named "state", and the "get_guest_ip" field will be renamed to "get_ip". For arm64, xen and kvm/x86, the application of DEFINE_STATIC_CALL_RET0 could make all that perf_guest_cbs stuff suck less. For arm, csky, nds32, and riscv, just applied some renamed refactoring. Cc: Will Deacon Cc: Marc Zyngier Cc: Guo Ren Cc: Nick Hu Cc: Paul Walmsley Cc: Boris Ostrovsky Cc: linux-arm-ker...@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-c...@vger.kernel.org Cc: linux-ri...@lists.infradead.org Cc: xen-de...@lists.xenproject.org Suggested-by: Peter Zijlstra (Intel) Original-by: Peter Zijlstra (Intel) Signed-off-by: Like Xu Signed-off-by: Zhu Lingshan Reviewed-by: Boris Ostrovsky Acked-by: Peter Zijlstra (Intel) --- arch/arm/kernel/perf_callchain.c | 16 +++- arch/arm64/kernel/perf_callchain.c | 29 +- arch/arm64/kvm/perf.c | 22 - arch/csky/kernel/perf_callchain.c | 4 +-- arch/nds32/kernel/perf_event_cpu.c | 16 +++- arch/riscv/kernel/perf_callchain.c | 4 +-- arch/x86/events/core.c | 39 -- arch/x86/events/intel/core.c | 7 +++--- arch/x86/include/asm/kvm_host.h| 2 +- arch/x86/kvm/pmu.c | 2 +- arch/x86/kvm/x86.c | 37 +++- arch/x86/xen/pmu.c | 33 ++--- include/linux/perf_event.h | 12 ++--- kernel/events/core.c | 9 +++ 14 files changed, 144 insertions(+), 88 deletions(-) diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e..1ce30f86d6c7 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -64,7 +64,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs && perf_guest_cbs->state()) { /* We don't support guest os callchain now */ return; } @@ -100,7 +100,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re { struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs && perf_guest_cbs->state()) { /* We don't support guest os callchain now */ return; } @@ -111,8 +111,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (perf_guest_cbs && perf_guest_cbs->state()) + return perf_guest_cbs->get_ip(); return instruction_pointer(regs); } @@ -120,9 +120,13 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) unsigned long perf_misc_flags(struct pt_regs *regs) { int misc = 0; + unsigned int state = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (perf_guest_cbs) + state = perf_guest_cbs->state(); + + if (perf_guest_cbs && state) { + if (state & PERF_GUEST_USER) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 4a72c2727309..1b344e23fd2f 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -5,6 +5,7 @@ * Copyright (C) 2015 ARM Limited */ #include +#include #include #include @@ -99,10 +100,25 @@ compat_user_backtrace(struct compat_frame_tail __user *tail, } #endif /* CONFIG_COMPAT */ +DEFINE_STATIC_CALL_RET0(arm64_guest_state, *(perf_guest_cbs->state)); +DEFINE_STATIC_CALL_RET0(arm64_guest_get_ip, *(perf_guest_cbs->get_ip)); + +void arch_perf_update_guest_cbs(void) +{ + static_call_update(arm64_guest_state, (void *)&__static_call_return0); + static_call_update(arm64_guest_get_ip, (void *)&__static_call_return0); + + if (perf_guest_cbs && perf_guest_cbs->state) + static_call_update(arm64_guest_state, perf_guest_cbs->state); + + if (perf_guest_cbs && perf_guest_cbs->get_ip) + static_call_update(arm64_guest_get_ip, perf_guest_cbs->get_ip); +} + void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (static_call(arm64_guest_state)()) { /* We don't support guest os
Re: [PATCH v2] memblock: make memblock_find_in_range method private
On Mon, Aug 2, 2021 at 8:37 AM Mike Rapoport wrote: > > From: Mike Rapoport > > There are a lot of uses of memblock_find_in_range() along with > memblock_reserve() from the times memblock allocation APIs did not exist. > > memblock_find_in_range() is the very core of memblock allocations, so any > future changes to its internal behaviour would mandate updates of all the > users outside memblock. > > Replace the calls to memblock_find_in_range() with an equivalent calls to > memblock_phys_alloc() and memblock_phys_alloc_range() and make > memblock_find_in_range() private method of memblock. > > This simplifies the callers, ensures that (unlikely) errors in > memblock_reserve() are handled and improves maintainability of > memblock_find_in_range(). > > Signed-off-by: Mike Rapoport For the ACPI part: Acked-by: Rafael J. Wysocki > --- > v2: don't change error message in arm::reserve_crashkernel(), per Russell > v1: https://lore.kernel.org/lkml/20210730104039.7047-1-r...@kernel.org > > arch/arm/kernel/setup.c | 18 + > arch/arm64/kvm/hyp/reserved_mem.c | 9 +++ > arch/arm64/mm/init.c | 36 - > arch/mips/kernel/setup.c | 14 +- > arch/riscv/mm/init.c | 44 ++- > arch/s390/kernel/setup.c | 10 --- > arch/x86/kernel/aperture_64.c | 5 ++-- > arch/x86/mm/init.c| 21 +-- > arch/x86/mm/numa.c| 5 ++-- > arch/x86/mm/numa_emulation.c | 5 ++-- > arch/x86/realmode/init.c | 2 +- > drivers/acpi/tables.c | 5 ++-- > drivers/base/arch_numa.c | 5 +--- > drivers/of/of_reserved_mem.c | 12 ++--- > include/linux/memblock.h | 2 -- > mm/memblock.c | 2 +- > 16 files changed, 78 insertions(+), 117 deletions(-) > > diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c > index f97eb2371672..67f5421b2af7 100644 > --- a/arch/arm/kernel/setup.c > +++ b/arch/arm/kernel/setup.c > @@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void) > unsigned long long lowmem_max = __pa(high_memory - 1) + 1; > if (crash_max > lowmem_max) > crash_max = lowmem_max; > - crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max, > - crash_size, CRASH_ALIGN); > + > + crash_base = memblock_phys_alloc_range(crash_size, > CRASH_ALIGN, > + CRASH_ALIGN, > crash_max); > if (!crash_base) { > pr_err("crashkernel reservation failed - No suitable > area found.\n"); > return; > } > } else { > + unsigned long long crash_max = crash_base + crash_size; > unsigned long long start; > > - start = memblock_find_in_range(crash_base, > - crash_base + crash_size, > - crash_size, SECTION_SIZE); > + start = memblock_phys_alloc_range(crash_size, SECTION_SIZE, > + crash_base, crash_max); > if (start != crash_base) { > pr_err("crashkernel reservation failed - memory is in > use.\n"); > return; > } > } > > - ret = memblock_reserve(crash_base, crash_size); > - if (ret < 0) { > - pr_warn("crashkernel reservation failed - memory is in use > (0x%lx)\n", > - (unsigned long)crash_base); > - return; > - } > - > pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System > RAM: %ldMB)\n", > (unsigned long)(crash_size >> 20), > (unsigned long)(crash_base >> 20), > diff --git a/arch/arm64/kvm/hyp/reserved_mem.c > b/arch/arm64/kvm/hyp/reserved_mem.c > index d654921dd09b..578670e3f608 100644 > --- a/arch/arm64/kvm/hyp/reserved_mem.c > +++ b/arch/arm64/kvm/hyp/reserved_mem.c > @@ -92,12 +92,10 @@ void __init kvm_hyp_reserve(void) > * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. > */ > hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; > - hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), > - ALIGN(hyp_mem_size, PMD_SIZE), > - PMD_SIZE); > + hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), > + PMD_SIZE); > if (!hyp_mem_base) > - hyp_mem_base = memblock_find_in_range(0, > memblock_end_of_DRAM(), > - hyp_mem_size, > PAGE_SIZE); > +
Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover
On Fri, Aug 06, 2021 at 01:42:42PM +0100, Will Deacon wrote: > On Fri, Aug 06, 2021 at 12:59:28PM +0100, Catalin Marinas wrote: > > On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote: > > > diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h > > > index 75beffe2ee8a..e9c30859f80c 100644 > > > --- a/arch/arm64/include/asm/mmu.h > > > +++ b/arch/arm64/include/asm/mmu.h > > > @@ -27,11 +27,32 @@ typedef struct { > > > } mm_context_t; > > > > > > /* > > > - * This macro is only used by the TLBI and low-level switch_mm() code, > > > - * neither of which can race with an ASID change. We therefore don't > > > - * need to reload the counter using atomic64_read(). > > > + * We use atomic64_read() here because the ASID for an 'mm_struct' can > > > + * be reallocated when scheduling one of its threads following a > > > + * rollover event (see new_context() and flush_context()). In this case, > > > + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush()) > > > + * may use a stale ASID. This is fine in principle as the new ASID is > > > + * guaranteed to be clean in the TLB, but the TLBI routines have to take > > > + * care to handle the following race: > > > + * > > > + *CPU 0CPU 1 CPU 2 > > > + * > > > + *// ptep_clear_flush(mm) > > > + *xchg_relaxed(pte, 0) > > > + *DSB ISHST > > > + *old = ASID(mm) > > > > We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is > > sufficient to order the pte write with the subsequent ASID read. > > Although I agree that the cat model needs updating and also that the Arm > ARM isn't helpful by trying to define DMB and DSB at the same time, it > does clearly state the following: > > // B2-149 > | A DSB instruction executed by a PE, PEe, completes when all of the > | following apply: > | > | * All explicit memory accesses of the required access types appearing > | in program order before the DSB are complete for the set of observers > | in the required shareability domain. > > [...] > > // B2-150 > | In addition, no instruction that appears in program order after the > | DSB instruction can alter any state of the system or perform any part > | of its functionality until the DSB completes other than: > | > | * Being fetched from memory and decoded. > | * Reading the general-purpose, SIMD and floating-point, Special-purpose, > | or System registers that are directly or indirectly read without > | causing side-effects. > > Which means that the ASID read cannot return its data before the DSB ISHST > has completed and the DSB ISHST cannot complete until the PTE write has > completed. Thanks for the explanation. > > Otherwise the patch looks fine to me: > > > > Reviewed-by: Catalin Marinas > > Thanks! Do you want to queue it for 5.15? I don't think there's a need to > rush it into 5.14 given that we don't have any evidence of it happening > in practice. Happy to queue it for 5.15. -- Catalin ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover
On Fri, Aug 06, 2021 at 12:59:28PM +0100, Catalin Marinas wrote: > On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote: > > diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h > > index 75beffe2ee8a..e9c30859f80c 100644 > > --- a/arch/arm64/include/asm/mmu.h > > +++ b/arch/arm64/include/asm/mmu.h > > @@ -27,11 +27,32 @@ typedef struct { > > } mm_context_t; > > > > /* > > - * This macro is only used by the TLBI and low-level switch_mm() code, > > - * neither of which can race with an ASID change. We therefore don't > > - * need to reload the counter using atomic64_read(). > > + * We use atomic64_read() here because the ASID for an 'mm_struct' can > > + * be reallocated when scheduling one of its threads following a > > + * rollover event (see new_context() and flush_context()). In this case, > > + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush()) > > + * may use a stale ASID. This is fine in principle as the new ASID is > > + * guaranteed to be clean in the TLB, but the TLBI routines have to take > > + * care to handle the following race: > > + * > > + *CPU 0CPU 1 CPU 2 > > + * > > + *// ptep_clear_flush(mm) > > + *xchg_relaxed(pte, 0) > > + *DSB ISHST > > + *old = ASID(mm) > > We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is > sufficient to order the pte write with the subsequent ASID read. Although I agree that the cat model needs updating and also that the Arm ARM isn't helpful by trying to define DMB and DSB at the same time, it does clearly state the following: // B2-149 | A DSB instruction executed by a PE, PEe, completes when all of the | following apply: | | * All explicit memory accesses of the required access types appearing | in program order before the DSB are complete for the set of observers | in the required shareability domain. [...] // B2-150 | In addition, no instruction that appears in program order after the | DSB instruction can alter any state of the system or perform any part | of its functionality until the DSB completes other than: | | * Being fetched from memory and decoded. | * Reading the general-purpose, SIMD and floating-point, Special-purpose, | or System registers that are directly or indirectly read without | causing side-effects. Which means that the ASID read cannot return its data before the DSB ISHST has completed and the DSB ISHST cannot complete until the PTE write has completed. > Otherwise the patch looks fine to me: > > Reviewed-by: Catalin Marinas Thanks! Do you want to queue it for 5.15? I don't think there's a need to rush it into 5.14 given that we don't have any evidence of it happening in practice. Will ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
RE: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU schedule out
> -Original Message- > From: Shameerali Kolothum Thodi > Sent: 03 August 2021 16:57 > To: 'Will Deacon' > Cc: linux-arm-ker...@lists.infradead.org; kvmarm@lists.cs.columbia.edu; > linux-ker...@vger.kernel.org; m...@kernel.org; catalin.mari...@arm.com; > james.mo...@arm.com; julien.thierry.k...@gmail.com; > suzuki.poul...@arm.com; jean-phili...@linaro.org; > alexandru.eli...@arm.com; qper...@google.com; Linuxarm > > Subject: RE: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU > schedule out > > > > > -Original Message- > > From: Will Deacon [mailto:w...@kernel.org] > > Sent: 03 August 2021 16:31 > > To: Shameerali Kolothum Thodi > > Cc: linux-arm-ker...@lists.infradead.org; kvmarm@lists.cs.columbia.edu; > > linux-ker...@vger.kernel.org; m...@kernel.org; catalin.mari...@arm.com; > > james.mo...@arm.com; julien.thierry.k...@gmail.com; > > suzuki.poul...@arm.com; jean-phili...@linaro.org; > > alexandru.eli...@arm.com; qper...@google.com; Linuxarm > > > > Subject: Re: [PATCH v3 4/4] KVM: arm64: Clear active_vmids on vCPU > > schedule out > > > > On Tue, Aug 03, 2021 at 12:55:25PM +, Shameerali Kolothum Thodi > > wrote: > > > > > diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c > > > > > index 5584e84aed95..5fd51f5445c1 100644 > > > > > --- a/arch/arm64/kvm/vmid.c > > > > > +++ b/arch/arm64/kvm/vmid.c > > > > > @@ -116,6 +116,12 @@ static u64 new_vmid(struct kvm_vmid > > > > *kvm_vmid) > > > > > return idx2vmid(vmid) | generation; > > > > > } > > > > > > > > > > +/* Call with preemption disabled */ > > > > > +void kvm_arm_vmid_clear_active(void) > > > > > +{ > > > > > + atomic64_set(this_cpu_ptr(_vmids), 0); > > > > > +} > > > > > > > > I think this is very broken, as it will force everybody to take the > > > > slow-path when they see an active_vmid of 0. > > > > > > Yes. I have seen that happening in my test setup. > > > > Why didn't you say so?! > > Sorry. I thought of getting some performance numbers with and > without this patch and measure the impact. But didn't quite get time > to finish it yet. These are some test numbers with and without this patch, run on two different test setups. a)Test Setup -1 --- Platform: HiSilicon D06 with 128 CPUs, VMID bits = 16 Run 128 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench 5 times before exiting. Measurements taken avg. of 10 Runs. Image : 5.14-rc3 --- Time(s) 44.43813888 No. of exits145,348,264 Image: 5.14-rc3 + vmid-v3 Time(s)46.59789034 No. of exits 133,587,307 %diff against 5.14-rc3 Time: 4.8% more Exits: 8% less Image: 5.14-rc3 + vmid-v3 + Without active_asid clear --- Time(s) 44.5031782 No. of exits 144,443,188 %diff against 5.14-rc3 Time: 0.15% more Exits: 2.42% less b)Test Setup -2 --- Platform: HiSilicon D06 + Kernel with maxcpus set to 8 and VMID bits set to 4. Run 40 VMs concurrently each with 2 vCPUs. Each Guest will execute hackbench 5 times before exiting. Measurements taken avg. of 10 Runs. Image : 5.14-rc3-vmid4bit Time(s)46.19963266 No. of exits 23,699,546 Image: 5.14-rc3-vmid4bit + vmid-v3 --- Time(s) 45.83307736 No. of exits 23,260,203 %diff against 5.14-rc3-vmid4bit Time: 0.8% less Exits: 1.85% less Image: 5.14-rc3-vmid4bit + vmid-v3 + Without active_asid clear - Time(s) 44.5031782 No. of exits144,443,188 %diff against 5.14-rc3-vmid4bit Time: 1.05% less Exits: 2.06% less As expected, the active_asid clear on schedule out is not helping. But without this patch, the numbers seems to be better than the vanilla kernel when we force the setup(cpus=8, vmd=4bits) to perform rollover. Please let me know your thoughts. Thanks, Shameer > > > > > > > It also doesn't solve the issue I mentioned before, as an active_vmid > > > > of 0 > > > > means that the reserved vmid is preserved. > > > > > > > > Needs more thought... > > > > > > How about we clear all the active_vmids in kvm_arch_free_vm() if it > > > matches the kvm_vmid->id ? But we may have to hold the lock > > > there > > > > I think we have to be really careful not to run into the "suspended > > animation" problem described in ae120d9edfe9 ("ARM: 7767/1: let the ASID > > allocator handle suspended animation") if we go down this road. > > > Ok. I will go through that. > > > Maybe something along the lines of: > > > > ROLLOVER > > > > * Take lock > > * Inc generation > > => This will force everybody down the slow path > > * Record active VMIDs > > * Broadcast TLBI > > => Only active VMIDs can be dirty > > => Reserve
Re: [PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover
On Fri, Aug 06, 2021 at 12:31:04PM +0100, Will Deacon wrote: > diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h > index 75beffe2ee8a..e9c30859f80c 100644 > --- a/arch/arm64/include/asm/mmu.h > +++ b/arch/arm64/include/asm/mmu.h > @@ -27,11 +27,32 @@ typedef struct { > } mm_context_t; > > /* > - * This macro is only used by the TLBI and low-level switch_mm() code, > - * neither of which can race with an ASID change. We therefore don't > - * need to reload the counter using atomic64_read(). > + * We use atomic64_read() here because the ASID for an 'mm_struct' can > + * be reallocated when scheduling one of its threads following a > + * rollover event (see new_context() and flush_context()). In this case, > + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush()) > + * may use a stale ASID. This is fine in principle as the new ASID is > + * guaranteed to be clean in the TLB, but the TLBI routines have to take > + * care to handle the following race: > + * > + *CPU 0CPU 1 CPU 2 > + * > + *// ptep_clear_flush(mm) > + *xchg_relaxed(pte, 0) > + *DSB ISHST > + *old = ASID(mm) We'd need specs clarified (ARM ARM, cat model) that the DSB ISHST is sufficient to order the pte write with the subsequent ASID read. Otherwise the patch looks fine to me: Reviewed-by: Catalin Marinas ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH] of: restricted dma: Don't fail device probe on rmem init failure
On Fri, Aug 06, 2021 at 12:31:05PM +0100, Will Deacon wrote: > If CONFIG_DMA_RESTRICTED_POOL=n then probing a device with a reference > to a "restricted-dma-pool" will fail with a reasonably cryptic error: > > | pci-host-generic: probe of 1.pci failed with error -22 > > Print a more helpful message in this case and try to continue probing > the device as we do if the kernel doesn't have the restricted DMA patches > applied or either CONFIG_OF_ADDRESS or CONFIG_HAS_DMA =n. > > Cc: Claire Chang > Cc: Konrad Rzeszutek Wilk > Cc: Robin Murphy > Cc: Christoph Hellwig > Cc: Rob Herring > Signed-off-by: Will Deacon > --- > drivers/of/address.c| 8 > drivers/of/device.c | 2 +- > drivers/of/of_private.h | 8 +++- > 3 files changed, 8 insertions(+), 10 deletions(-) Sorry, didn't mean to send this patch a second time, it was still kicking around in my tree from yesterday and I accidentally picked it up when sending my TLBI series. Please ignore. Will ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH 4/4] KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE
From: Marc Zyngier Since TLB invalidation can run in parallel with VMID allocation, we need to be careful and avoid any sort of load/store tearing. Use {READ,WRITE}_ONCE consistently to avoid any surprise. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_mmu.h | 7 ++- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++-- arch/arm64/kvm/mmu.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 934ef0deff9f..5828dd8fa738 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, #define kvm_phys_to_vttbr(addr)phys_to_ttbr(addr) +/* + * When this is (directly or indirectly) used on the TLB invalidation + * path, we rely on a previously issued DSB so that page table updates + * and VMID reads are correctly ordered. + */ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { struct kvm_vmid *vmid = >vmid; @@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; baddr = mmu->pgd_phys; - vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..658f76067f46 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid) kvm_call_hyp(__kvm_flush_vm_context); } - vmid->vmid = kvm_next_vmid; + WRITE_ONCE(vmid->vmid, kvm_next_vmid); kvm_next_vmid++; kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d4e74ca7f876..55ae97a144b8 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); mmu->arch = _kvm.arch; mmu->pgt = _kvm.pgt; - mmu->vmid.vmid_gen = 0; - mmu->vmid.vmid = 0; + WRITE_ONCE(mmu->vmid.vmid_gen, 0); + WRITE_ONCE(mmu->vmid.vmid, 0); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..b1a6eaec28ff 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) mmu->arch = >arch; mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); - mmu->vmid.vmid_gen = 0; + WRITE_ONCE(mmu->vmid.vmid_gen, 0); return 0; out_destroy_pgtable: -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH 3/4] KVM: arm64: Convert the host S2 over to __load_guest_stage2()
From: Marc Zyngier The protected mode relies on a separate helper to load the S2 context. Move over to the __load_guest_stage2() helper instead. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_mmu.h | 11 +++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 05e089653a1a..934ef0deff9f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, + struct kvm_arch *arch) { - write_sysreg(vtcr, vtcr_el2); + write_sysreg(arch->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, - struct kvm_arch *arch) -{ - __load_stage2(mmu, arch->vtcr); -} - static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 9c227d87c36d..a910648bc71b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) { if (static_branch_likely(_protected_mode_initialized)) - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr); + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..d4e74ca7f876 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void) kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(_kvm.arch.mmu, host_kvm.arch.vtcr); + __load_guest_stage2(_kvm.arch.mmu, _kvm.arch); /* * Make sure to have an ISB before the TLB maintenance below but only -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH 2/4] KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into the callers
From: Marc Zyngier It is a bit awkward to use kern_hyp_va() in __load_guest_stage2(), specially as the helper is shared between VHE and nVHE. Instead, move the use of kern_hyp_va() in the nVHE code, and pass a pointer to the kvm->arch structure instead. Although this may look a bit awkward, it allows for some further simplification. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_mmu.h | 5 +++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +++- arch/arm64/kvm/hyp/nvhe/tlb.c| 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b52c5c4b9a3d..05e089653a1a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -280,9 +280,10 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, + struct kvm_arch *arch) { - __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr); + __load_stage2(mmu, arch->vtcr); } static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f7af9688c1f7..e50a49082923 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -170,6 +170,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; + struct kvm_s2_mmu *mmu; bool pmu_switch_needed; u64 exit_code; @@ -213,7 +214,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg32_restore_state(vcpu); __sysreg_restore_state_nvhe(guest_ctxt); - __load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu)); + mmu = kern_hyp_va(vcpu->arch.hw_mmu); + __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 38ed0f6f2703..76229407d8f0 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -39,7 +39,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_guest_stage2(mmu); + __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b3229924d243..0cb7523a501a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -128,7 +128,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * __load_guest_stage2 configures stage 2 translation, and * __activate_traps clear HCR_EL2.TGE (among other things). */ - __load_guest_stage2(vcpu->arch.hw_mmu); + __load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 66f17349f0c3..5e9fb3989e0b 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -53,7 +53,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * place before clearing TGE. __load_guest_stage2() already * has an ISB in order to deal with this. */ - __load_guest_stage2(mmu); + __load_guest_stage2(mmu, mmu->arch); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH] of: restricted dma: Don't fail device probe on rmem init failure
If CONFIG_DMA_RESTRICTED_POOL=n then probing a device with a reference to a "restricted-dma-pool" will fail with a reasonably cryptic error: | pci-host-generic: probe of 1.pci failed with error -22 Print a more helpful message in this case and try to continue probing the device as we do if the kernel doesn't have the restricted DMA patches applied or either CONFIG_OF_ADDRESS or CONFIG_HAS_DMA =n. Cc: Claire Chang Cc: Konrad Rzeszutek Wilk Cc: Robin Murphy Cc: Christoph Hellwig Cc: Rob Herring Signed-off-by: Will Deacon --- drivers/of/address.c| 8 drivers/of/device.c | 2 +- drivers/of/of_private.h | 8 +++- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 973257434398..f6bf4b423c2a 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -997,7 +997,7 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) return ret; } -int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) +void of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) { struct device_node *node, *of_node = dev->of_node; int count, i; @@ -1022,11 +1022,11 @@ int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) */ if (of_device_is_compatible(node, "restricted-dma-pool") && of_device_is_available(node)) - return of_reserved_mem_device_init_by_idx(dev, of_node, - i); + break; } - return 0; + if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) + dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n"); } #endif /* CONFIG_HAS_DMA */ diff --git a/drivers/of/device.c b/drivers/of/device.c index 2defdca418ec..258a2b099410 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -166,7 +166,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); if (!iommu) - return of_dma_set_restricted_buffer(dev, np); + of_dma_set_restricted_buffer(dev, np); return 0; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index f557bd22b0cf..bc883f69496b 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -163,18 +163,16 @@ struct bus_dma_region; #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA) int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map); -int of_dma_set_restricted_buffer(struct device *dev, struct device_node *np); +void of_dma_set_restricted_buffer(struct device *dev, struct device_node *np); #else static inline int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) { return -ENODEV; } -static inline int of_dma_set_restricted_buffer(struct device *dev, - struct device_node *np) +static inline void of_dma_set_restricted_buffer(struct device *dev, + struct device_node *np) { - /* Do nothing, successfully. */ - return 0; } #endif -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH 1/4] arm64: mm: Fix TLBI vs ASID rollover
When switching to an 'mm_struct' for the first time following an ASID rollover, a new ASID may be allocated and assigned to 'mm->context.id'. This reassignment can happen concurrently with other operations on the mm, such as unmapping pages and subsequently issuing TLB invalidation. Consequently, we need to ensure that (a) accesses to 'mm->context.id' are atomic and (b) all page-table updates made prior to a TLBI using the old ASID are guaranteed to be visible to CPUs running with the new ASID. This was found by inspection after reviewing the VMID changes from Shameer but it looks like a real (yet hard to hit) bug. Cc: Cc: Catalin Marinas Cc: Marc Zyngier Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 29 + arch/arm64/include/asm/tlbflush.h | 11 ++- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 75beffe2ee8a..e9c30859f80c 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -27,11 +27,32 @@ typedef struct { } mm_context_t; /* - * This macro is only used by the TLBI and low-level switch_mm() code, - * neither of which can race with an ASID change. We therefore don't - * need to reload the counter using atomic64_read(). + * We use atomic64_read() here because the ASID for an 'mm_struct' can + * be reallocated when scheduling one of its threads following a + * rollover event (see new_context() and flush_context()). In this case, + * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush()) + * may use a stale ASID. This is fine in principle as the new ASID is + * guaranteed to be clean in the TLB, but the TLBI routines have to take + * care to handle the following race: + * + *CPU 0CPU 1 CPU 2 + * + *// ptep_clear_flush(mm) + *xchg_relaxed(pte, 0) + *DSB ISHST + *old = ASID(mm) + * | + * | new = new_context(mm) + * \-> atomic_set(mm->context.id, new) + * cpu_switch_mm(mm) + * // Hardware walk of pte using new ASID + *TLBI(old) + * + * In this scenario, the barrier on CPU 0 and the dependency on CPU 1 + * ensure that the page-table walker on CPU 1 *must* see the invalid PTE + * written by CPU 0. */ -#define ASID(mm) ((mm)->context.id.counter & 0x) +#define ASID(mm) (atomic64_read(&(mm)->context.id) & 0x) static inline bool arm64_kernel_unmapped_at_el0(void) { diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index cc3f5a33ff9c..36f02892e1df 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -245,9 +245,10 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = __TLBI_VADDR(0, ASID(mm)); + unsigned long asid; dsb(ishst); + asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); dsb(ish); @@ -256,9 +257,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); + unsigned long addr; dsb(ishst); + addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); } @@ -283,9 +285,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, { int num = 0; int scale = 0; - unsigned long asid = ASID(vma->vm_mm); - unsigned long addr; - unsigned long pages; + unsigned long asid, addr, pages; start = round_down(start, stride); end = round_up(end, stride); @@ -305,6 +305,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, } dsb(ishst); + asid = ASID(vma->vm_mm); /* * When the CPU does not support TLB range operations, flush the TLB -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH 0/4] Fix racing TLBI with ASID/VMID reallocation
Hi all, While reviewing Shameer's reworked VMID allocator [1] and discussing with Marc, we spotted a race between TLB invalidation (which typically takes an ASID or VMID argument) and reallocation of ASID/VMID for the context being targetted. The first patch spells out an example with try_to_unmap_one() in a comment, which Catalin has kindly modelled in TLA+ at [2]. Although I'm posting all this together for ease of review, the intention is that the first patch will go via arm64 with the latter going via kvm. Cheers, Will [1] https://lore.kernel.org/r/20210729104009.382-1-shameerali.kolothum.th...@huawei.com [2] https://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/kernel-tla.git/commit/ Cc: Catalin Marinas Cc: Marc Zyngier Cc: Jade Alglave Cc: Shameer Kolothum Cc: Cc: --->8 Marc Zyngier (3): KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into the callers KVM: arm64: Convert the host S2 over to __load_guest_stage2() KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE Will Deacon (1): arm64: mm: Fix TLBI vs ASID rollover arch/arm64/include/asm/kvm_mmu.h | 17 ++- arch/arm64/include/asm/mmu.h | 29 --- arch/arm64/include/asm/tlbflush.h | 11 +++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 6 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++- arch/arm64/kvm/hyp/nvhe/tlb.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- 11 files changed, 52 insertions(+), 27 deletions(-) -- 2.32.0.605.g8dce9f2422-goog ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[RFC PATCH 14/15] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant.
From: Zi Yan For other MAX_ORDER uses (described below), there is no need or too much hassle to convert certain static array to dynamic ones. Add MIN_MAX_ORDER to serve as compile time constant in place of MAX_ORDER. ARM64 hypervisor maintains its own free page list and does not import any core kernel symbols, so soon-to-be runtime variable MAX_ORDER is not accessible in ARM64 hypervisor code. Also there is no need to allocating very large pages. In SLAB/SLOB/SLUB, 2-D array kmalloc_caches uses MAX_ORDER in its second dimension. It is too much hassle to allocate memory for kmalloc_caches before any proper memory allocator is set up. Signed-off-by: Zi Yan Cc: Marc Zyngier Cc: Catalin Marinas Cc: Christoph Lameter Cc: Vlastimil Babka Cc: Quentin Perret Cc: linux-arm-ker...@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 3 ++- include/linux/mmzone.h| 3 +++ include/linux/slab.h | 8 mm/slab.c | 2 +- mm/slub.c | 7 --- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fb0f523d1492..c774b4a98336 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -16,7 +16,7 @@ struct hyp_pool { * API at EL2. */ hyp_spinlock_t lock; - struct list_head free_area[MAX_ORDER]; + struct list_head free_area[MIN_MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; unsigned short max_order; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 41fc25bdfb34..a1cc1b648de0 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -226,7 +226,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, int i; hyp_spin_lock_init(>lock); - pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + + pool->max_order = min(MIN_MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); for (i = 0; i < pool->max_order; i++) INIT_LIST_HEAD(>free_area[i]); pool->range_start = phys; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 09aafc05aef4..379dada82d4b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -27,11 +27,14 @@ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #ifdef CONFIG_SET_MAX_ORDER #define MAX_ORDER CONFIG_SET_MAX_ORDER +#define MIN_MAX_ORDER CONFIG_SET_MAX_ORDER #else #define MAX_ORDER 11 +#define MIN_MAX_ORDER MAX_ORDER #endif /* CONFIG_SET_MAX_ORDER */ #else #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER +#define MIN_MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif /* CONFIG_ARCH_FORCE_MAX_ORDER */ #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) diff --git a/include/linux/slab.h b/include/linux/slab.h index 2c0d80cca6b8..d8747c158db6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -244,8 +244,8 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, * to do various tricks to work around compiler limitations in order to * ensure proper constant folding. */ -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ - (MAX_ORDER + PAGE_SHIFT - 1) : 25) +#define KMALLOC_SHIFT_HIGH ((MIN_MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ + (MIN_MAX_ORDER + PAGE_SHIFT - 1) : 25) #define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 @@ -258,7 +258,7 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) +#define KMALLOC_SHIFT_MAX (MIN_MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif @@ -271,7 +271,7 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, * be allocated from the same page. */ #define KMALLOC_SHIFT_HIGH PAGE_SHIFT -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) +#define KMALLOC_SHIFT_MAX (MIN_MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif diff --git a/mm/slab.c b/mm/slab.c index d0f725637663..0041de8ec0e9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -466,7 +466,7 @@ static int __init slab_max_order_setup(char *str) { get_option(, _max_order); slab_max_order = slab_max_order < 0 ? 0 : - min(slab_max_order, MAX_ORDER - 1); + min(slab_max_order, MIN_MAX_ORDER - 1);