From: Haseeb Ashraf <[email protected]> This feature is available since armv8 and can be used to perform IPA-based TLBI for arm32. XENMEM_remove_from_physmap performs this invalidation in each hypercall so this code path will be optimized, instead of performing a TLBIALL each time in presence of nTLBPA.
Suggested-by: Julien Grall <[email protected]> Signed-off-by: Haseeb Ashraf <[email protected]> --- xen/arch/arm/cpufeature.c | 12 ++++ xen/arch/arm/include/asm/arm32/flushtlb.h | 76 +++++++++++++++++++++++ xen/arch/arm/include/asm/cpregs.h | 4 ++ xen/arch/arm/include/asm/cpufeature.h | 7 ++- xen/arch/arm/include/asm/processor.h | 8 +++ 5 files changed, 105 insertions(+), 2 deletions(-) diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c index 9fa1c45869..d18c6449c6 100644 --- a/xen/arch/arm/cpufeature.c +++ b/xen/arch/arm/cpufeature.c @@ -18,6 +18,11 @@ DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS); struct cpuinfo_arm __read_mostly domain_cpuinfo; #ifdef CONFIG_ARM_32 +static bool has_tlb_ipa_instruction(const struct arm_cpu_capabilities *entry) +{ + return system_cpuinfo.mm32.unitlb == MM32_UNITLB_BY_IPA; +} + static bool has_ntlbpa(const struct arm_cpu_capabilities *entry) { return system_cpuinfo.mm32.ntlbpa == MM32_NTLBPA_SUPPORT_IMP; @@ -37,6 +42,13 @@ static bool has_sb_instruction(const struct arm_cpu_capabilities *entry) #endif static const struct arm_cpu_capabilities arm_features[] = { +#ifdef CONFIG_ARM_32 + { + .desc = "IPA-based TLB Invalidation", + .capability = ARM32_HAS_TLB_IPA, + .matches = has_tlb_ipa_instruction, + }, +#endif #if defined(CONFIG_ARM_32) || defined(CONFIG_ARM_64) { .desc = "Intermediate caching of translation table walks (nTLBPA)", diff --git a/xen/arch/arm/include/asm/arm32/flushtlb.h b/xen/arch/arm/include/asm/arm32/flushtlb.h index db15f29216..0fd45aaaed 100644 --- a/xen/arch/arm/include/asm/arm32/flushtlb.h +++ b/xen/arch/arm/include/asm/arm32/flushtlb.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARM_ARM32_FLUSHTLB_H__ #define __ASM_ARM_ARM32_FLUSHTLB_H__ +#include <xen/sizes.h> /* For SZ_* macros. */ + /* * Every invalidation operation use the following patterns: * @@ -99,12 +101,56 @@ static inline void __flush_xen_tlb_one(vaddr_t va) asm volatile(STORE_CP32(0, TLBIMVAHIS) : : "r" (va) : "memory"); } +/* Flush TLB of local processor for address ipa. */ +static inline void __flush_guest_tlb_one_s2_local(paddr_t ipa) +{ + asm volatile(STORE_CP32(0, TLBIIPAS2) + : : "r" (ipa >> PAGE_SHIFT) : "memory"); +} + +/* Flush TLB of all processors in the inner-shareable domain for address ipa. */ +static inline void __flush_guest_tlb_one_s2(paddr_t ipa) +{ + asm volatile(STORE_CP32(0, TLBIIPAS2IS) + : : "r" (ipa >> PAGE_SHIFT) : "memory"); +} + /* * Flush a range of IPA's mappings from the TLB of the local processor. */ static inline void flush_guest_tlb_range_ipa_local(paddr_t ipa, unsigned long size) { + if ( cpus_have_const_cap(ARM_HAS_NTLBPA) && + cpus_have_const_cap(ARM32_HAS_TLB_IPA) ) + { + paddr_t end; + + /* + * If IPA range is too big (empirically found to be 256M), then fallback + * to full TLB flush + */ + if ( size > SZ_256M ) + goto default_tlbi; + + end = ipa + size; + + /* + * See ARM ARM DDI 0487A.e D4.7.2 (Invalidation of TLB entries from + * stage 2 translations) for details. + */ + dsb(nshst); /* Ensure prior page-tables updates have completed */ + while ( ipa < end ) + { + __flush_guest_tlb_one_s2_local(ipa); + ipa += PAGE_SIZE; + } + dsb(nsh); + isb(); + return; + } + +default_tlbi: /* * Following can invalidate both stage-1 and stage-2 TLBs depending upon * the execution mode. @@ -120,6 +166,36 @@ static inline void flush_guest_tlb_range_ipa_local(paddr_t ipa, static inline void flush_guest_tlb_range_ipa(paddr_t ipa, unsigned long size) { + if ( cpus_have_const_cap(ARM_HAS_NTLBPA) && + cpus_have_const_cap(ARM32_HAS_TLB_IPA) ) + { + paddr_t end; + + /* + * If IPA range is too big (empirically found to be 256M), then fallback + * to full TLB flush + */ + if ( size > SZ_256M ) + goto default_tlbi; + + end = ipa + size; + + /* + * See ARM ARM DDI 0487A.e D4.7.2 (Invalidation of TLB entries from + * stage 2 translations) for details. + */ + dsb(ishst); /* Ensure prior page-tables updates have completed */ + while ( ipa < end ) + { + __flush_guest_tlb_one_s2(ipa); + ipa += PAGE_SIZE; + } + dsb(ish); + isb(); + return; + } + +default_tlbi: /* * Following can invalidate both stage-1 and stage-2 TLBs depending upon * the execution mode. diff --git a/xen/arch/arm/include/asm/cpregs.h b/xen/arch/arm/include/asm/cpregs.h index a7503a190f..46415b05d3 100644 --- a/xen/arch/arm/include/asm/cpregs.h +++ b/xen/arch/arm/include/asm/cpregs.h @@ -229,6 +229,10 @@ #define TLBIALLH p15,4,c8,c7,0 /* Invalidate Entire Hyp. Unified TLB */ #define TLBIMVAH p15,4,c8,c7,1 /* Invalidate Unified Hyp. TLB by MVA */ #define TLBIALLNSNH p15,4,c8,c7,4 /* Invalidate Entire Non-Secure Non-Hyp. Unified TLB */ +#define TLBIIPAS2IS p15,4,c8,c0,1 /* Invalidate unified TLB entry for stage 2 by IPA inner shareable */ +#define TLBIIPAS2LIS p15,4,c8,c0,5 /* Invalidate unified TLB entry for stage 2 last level by IPA inner shareable */ +#define TLBIIPAS2 p15,4,c8,c4,1 /* Invalidate unified TLB entry for stage 2 by IPA */ +#define TLBIIPAS2L p15,4,c8,c4,5 /* Invalidate unified TLB entry for stage 2 last level by IPA */ /* CP15 CR9: Performance monitors */ #define PMCR p15,0,c9,c12,0 /* Perf. Mon. Control Register */ diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h index af4e9bdf06..8f6b889850 100644 --- a/xen/arch/arm/include/asm/cpufeature.h +++ b/xen/arch/arm/include/asm/cpufeature.h @@ -77,8 +77,9 @@ #define ARM_HAS_SB 16 #define ARM64_WORKAROUND_1508412 17 #define ARM_HAS_NTLBPA 18 +#define ARM32_HAS_TLB_IPA 19 -#define ARM_NCAPS 19 +#define ARM_NCAPS 20 #ifndef __ASSEMBLY__ @@ -440,7 +441,9 @@ struct cpuinfo_arm { /* MMFR1 */ unsigned long __res1:32; /* MMFR2 */ - unsigned long __res2:32; + unsigned long __res2_0:16; + unsigned long unitlb:4; + unsigned long __res2_1:12; /* MMFR3 */ unsigned long __res3:32; /* MMFR4 */ diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h index 01f3cb16a0..5a414e80df 100644 --- a/xen/arch/arm/include/asm/processor.h +++ b/xen/arch/arm/include/asm/processor.h @@ -459,6 +459,14 @@ #define FSRL_STATUS_DEBUG (_AC(0x22,UL)<<0) #ifdef CONFIG_ARM_32 +#define MM32_UNITLB_NI 0x0 +#define MM32_UNITLB_BY_VA 0x1 +#define MM32_UNITLB_BY_MATCH_ASID 0x2 +#define MM32_UNITLB_BY_ALL_ASID 0x3 +#define MM32_UNITLB_OTHER_TLBS 0x4 +#define MM32_UNITLB_BROADCAST 0x5 +#define MM32_UNITLB_BY_IPA 0x6 + #define MM32_NTLBPA_SUPPORT_NI 0x0 #define MM32_NTLBPA_SUPPORT_IMP 0x1 #endif -- 2.43.0
