On Mon, Feb 23, 2026 at 05:48:01PM +0000, Yeoreum Yun wrote: > Use the CASLT instruction to swap the guest descriptor when FEAT_LSUI > is enabled, avoiding the need to clear the PAN bit. > > Signed-off-by: Yeoreum Yun <[email protected]> > --- > arch/arm64/include/asm/futex.h | 17 +---------------- > arch/arm64/include/asm/lsui.h | 27 +++++++++++++++++++++++++++ > arch/arm64/kvm/at.c | 32 +++++++++++++++++++++++++++++++- > 3 files changed, 59 insertions(+), 17 deletions(-) > create mode 100644 arch/arm64/include/asm/lsui.h > > diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h > index b579e9d0964d..6779c4ad927f 100644 > --- a/arch/arm64/include/asm/futex.h > +++ b/arch/arm64/include/asm/futex.h > @@ -7,11 +7,9 @@ > > #include <linux/futex.h> > #include <linux/uaccess.h> > -#include <linux/stringify.h> > > -#include <asm/alternative.h> > -#include <asm/alternative-macros.h> > #include <asm/errno.h> > +#include <asm/lsui.h> > > #define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think > of? */ > > @@ -91,8 +89,6 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 > newval, u32 *oval) > > #ifdef CONFIG_ARM64_LSUI > > -#define __LSUI_PREAMBLE ".arch_extension lsui\n" > - > #define LSUI_FUTEX_ATOMIC_OP(op, asm_op) \ > static __always_inline int \ > __lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > @@ -235,17 +231,6 @@ __lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 > newval, u32 *oval) > { > return __lsui_cmpxchg32(uaddr, oldval, newval, oval); > } > - > -#define __lsui_llsc_body(op, ...) \ > -({ \ > - alternative_has_cap_unlikely(ARM64_HAS_LSUI) ? \ > - __lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__); \ > -}) > - > -#else /* CONFIG_ARM64_LSUI */ > - > -#define __lsui_llsc_body(op, ...) __llsc_##op(__VA_ARGS__) > - > #endif /* CONFIG_ARM64_LSUI */ > > > diff --git a/arch/arm64/include/asm/lsui.h b/arch/arm64/include/asm/lsui.h > new file mode 100644 > index 000000000000..4f956188835e > --- /dev/null > +++ b/arch/arm64/include/asm/lsui.h > @@ -0,0 +1,27 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_LSUI_H > +#define __ASM_LSUI_H > + > +#include <linux/compiler_types.h> > +#include <linux/stringify.h> > +#include <asm/alternative.h> > +#include <asm/alternative-macros.h> > +#include <asm/cpucaps.h> > + > +#ifdef CONFIG_ARM64_LSUI > + > +#define __LSUI_PREAMBLE ".arch_extension lsui\n" > + > +#define __lsui_llsc_body(op, ...) \ > +({ \ > + alternative_has_cap_unlikely(ARM64_HAS_LSUI) ? \ > + __lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__); \ > +}) > + > +#else /* CONFIG_ARM64_LSUI */ > + > +#define __lsui_llsc_body(op, ...) __llsc_##op(__VA_ARGS__) > + > +#endif /* CONFIG_ARM64_LSUI */ > + > +#endif /* __ASM_LSUI_H */ > diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c > index 885bd5bb2f41..1aceeef04567 100644 > --- a/arch/arm64/kvm/at.c > +++ b/arch/arm64/kvm/at.c > @@ -9,6 +9,7 @@ > #include <asm/esr.h> > #include <asm/kvm_hyp.h> > #include <asm/kvm_mmu.h> > +#include <asm/lsui.h> > > static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) > { > @@ -1704,6 +1705,33 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, > u64 va, u64 ipa, int *level) > } > } > > +#ifdef CONFIG_ARM64_LSUI > +static int __lsui_swap_desc(u64 __user *ptep, u64 old, u64 new) > +{ > + u64 tmp = old; > + int ret = 0; > + > + uaccess_ttbr0_enable(); > + > + asm volatile(__LSUI_PREAMBLE > + "1: caslt %[old], %[new], %[addr]\n" > + "2:\n" > + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret]) > + : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret) > + : [new] "r" (new) > + : "memory"); > + > + uaccess_ttbr0_disable(); > + > + if (ret) > + return ret; > + if (tmp != old) > + return -EAGAIN; > + > + return ret; > +} > +#endif > + > static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) > { > u64 tmp = old; > @@ -1779,7 +1807,9 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 > old, u64 new) > return -EPERM; > > ptep = (u64 __user *)hva + offset; > - if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) > + if (IS_ENABLED(CONFIG_ARM64_LSUI) && > cpus_have_final_cap(ARM64_HAS_LSUI))
cpucap_is_possible() is where the Kconfig check should go. Thanks, Oliver

