https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/170490
>From b2a59f99a05563c819b720ea56d806beb3f13d79 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin <[email protected]> Date: Tue, 25 Nov 2025 14:21:21 +0000 Subject: [PATCH 1/3] [AArch64] Add support for range prefetch intrinsic This patch adds support in Clang for the RPRFM instruction, which is available when FEAT_RPRFM is defined: void __rpld(int64_t access_kind, uint64_t retention_policy uint64_t reuse distance, int64_t stride, uint64_t count, int64_t length, void const *addr); If FEAT_RPRFM is not available, this instruction is a NOP. This implements the following ACLE proposal: https://github.com/ARM-software/acle/pull/423 --- clang/include/clang/Basic/BuiltinsAArch64.def | 3 ++ clang/lib/Basic/Targets/AArch64.cpp | 7 +++++ clang/lib/Basic/Targets/AArch64.h | 1 + clang/lib/Headers/arm_acle.h | 4 +++ clang/lib/Sema/SemaARM.cpp | 9 ++++++ clang/test/CodeGen/arm_acle.c | 13 +++++++++ clang/test/CodeGen/builtins-arm64.c | 14 +++++++++ .../print-supported-extensions-aarch64.c | 1 + .../Preprocessor/aarch64-target-features.c | 3 ++ clang/test/Sema/builtins-arm64.c | 13 ++++++++- llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 +++++ llvm/lib/IR/Verifier.cpp | 21 ++++++++++++++ llvm/lib/Target/AArch64/AArch64Features.td | 3 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 23 +++++++++++++++ llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 +++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 10 +++++++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + .../AArch64/GISel/AArch64LegalizerInfo.cpp | 29 ++++++++++++++++++- llvm/test/CodeGen/AArch64/range-prefetch.ll | 28 ++++++++++++++++++ 19 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/range-prefetch.ll diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index adb6c941e852a..7bbf747d705c7 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -96,6 +96,9 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a") // Prefetch BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") +// Range Prefetch +BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiUiiUii", "nc") + // System Registers BUILTIN(__builtin_arm_rsr, "UicC*", "nc") BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc") diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d7f36c0f9b79a..38018953a269e 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -612,6 +612,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasLSE) Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1"); + if (HasRPRFM) + Builder.defineMacro("__ARM_FEATURE_RPRFM", "1"); + if (HasBFloat16) { Builder.defineMacro("__ARM_FEATURE_BF16", "1"); Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); @@ -870,6 +873,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("ssve-fp8fma", HasSSVE_FP8FMA) .Case("sme-f8f32", HasSME_F8F32) .Case("sme-f8f16", HasSME_F8F16) + .Case("rprfm", HasRPRFM) .Default(false); } @@ -1100,6 +1104,9 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, if (Feature == "+strict-align") HasUnalignedAccess = false; + if (Feature == "+rprfm") + HasRPRFM = true; + // All predecessor archs are added but select the latest one for ArchKind. if (Feature == "+v8a" && ArchInfo->Version < llvm::AArch64::ARMV8A.Version) ArchInfo = &llvm::AArch64::ARMV8A; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 1a7aa658e9d87..866a9cb2c2711 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -131,6 +131,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasRCPC3 = false; bool HasSMEFA64 = false; bool HasPAuthLR = false; + bool HasRPRFM = false; const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A; diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 97f63e8ecf71f..4b6cd97be602a 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -98,6 +98,10 @@ __swp(uint32_t __x, volatile uint32_t *__p) { #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) +#define __rpld(access_kind, retention_policy, reuse_distance, stride, count, \ + length, addr) \ + __builtin_arm_range_prefetch(addr, access_kind, retention_policy, \ + reuse_distance, stride, count, length) #endif /* 7.6.2 Instruction prefetch */ diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index a5164a94b57fa..da3438fb77118 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1122,6 +1122,15 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1); } + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) { + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) || + SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) || + SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) || + SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040); + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_wsr64 || BuiltinID == AArch64::BI__builtin_arm_rsr128 || diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 0f539cba5c758..1f1c8b82c0ae1 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -164,6 +164,19 @@ void test_pld() { __pld(0); } +#if defined(__ARM_64BIT_STATE) + +// AArch64-LABEL: @test_rpld( +// AArch64-NEXT: entry: +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040) +// AArch64-NEXT: ret void +// +void test_rpld() { + __rpld(1, 1, 15, -2048, 65535, 2040, 0); +} + +#endif + // AArch32-LABEL: @test_pldx( // AArch32-NEXT: entry: // AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1) diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 86c2812434643..1262823bf6ed3 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -62,6 +62,20 @@ void prefetch(void) { // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1) } +void range_prefetch(void) { + __builtin_arm_range_prefetch(0, 0, 0, 15, 1024, 24, 2); // pldkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i32 15, i32 1024, i32 24, i32 2) + + __builtin_arm_range_prefetch(0, 0, 1, 15, 1024, 24, 2); // pldstrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i32 15, i32 1024, i32 24, i32 2) + + __builtin_arm_range_prefetch(0, 1, 0, 15, 1024, 24, 2); // pstkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i32 15, i32 1024, i32 24, i32 2) + + __builtin_arm_range_prefetch(0, 1, 1, 15, 1024, 24, 2); // pststrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 1024, i32 24, i32 2) +} + __attribute__((target("v8.5a"))) int32_t jcvt(double v) { //CHECK-LABEL: @jcvt( diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index 1f8929e705e4c..1a34478f11c6b 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -69,6 +69,7 @@ // CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension // CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set // CHECK-NEXT: rdm FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: rprfm FEAT_RPRFM Enable Armv8.0-A Range Prefetch Memory instruction // CHECK-NEXT: sb FEAT_SB Enable Armv8.5-A Speculation Barrier // CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support // CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 4dd243e57a63e..d5d78f1118a4f 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -789,3 +789,6 @@ // CHECK-SMEF8F16: __ARM_FEATURE_FP8 1 // CHECK-SMEF8F16: __ARM_FEATURE_SME2 1 // CHECK-SMEF8F16: __ARM_FEATURE_SME_F8F16 1 + +// RUN: %clang --target=aarch64 -march=armv8-a+rprfm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RPRFM %s +// CHECK-RPRFM: __ARM_FEATURE_RPRFM 1 diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c index f094162b3aadc..3d26b16d461d0 100644 --- a/clang/test/Sema/builtins-arm64.c +++ b/clang/test/Sema/builtins-arm64.c @@ -30,6 +30,17 @@ void test_prefetch(void) { __builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}} } +void test_range_prefetch(void) { + __builtin_arm_range_prefetch(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}} +} + void test_trap(short s, unsigned short us) { __builtin_arm_trap(42); __builtin_arm_trap(65535); @@ -37,4 +48,4 @@ void test_trap(short s, unsigned short us) { __builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}} __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} -} \ No newline at end of file +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 1c86c6815f049..43a7f10ce2618 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -76,6 +76,14 @@ def int_aarch64_prefetch : Intrinsic<[], ]>, ClangBuiltin<"__builtin_arm_prefetch">; +def int_aarch64_range_prefetch : Intrinsic<[], + [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>, + ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, + ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>]>, + ClangBuiltin<"__builtin_arm_range_prefetch">; + //===----------------------------------------------------------------------===// // Data Barrier Instructions diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a1e14d8f25bf7..17c4bfd67b4c0 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6701,6 +6701,27 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call); break; } + case Intrinsic::aarch64_range_prefetch: { + Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2, + "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call); + Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2, + "stream argument to llvm.aarch64.range.prefetch must be 0 or 1", + Call); + Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 16, + "reuse distance argument to llvm.aarch64.range.prefetch must be < 16", + Call); + int Stride = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue(); + Check(Stride > -2049 && Stride < 2041, + "stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040", + Call); + Check(cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue() < 65536, + "count argument to llvm.aarch64.range.prefetch must be < 65536"); + int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue(); + Check(Length > -2049 && Length < 2041, + "length argument to llvm.aarch64.range.prefetch must be -2048 -" + "2040"); + break; + } case Intrinsic::callbr_landingpad: { const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0)); Check(CBR, "intrinstic requires callbr operand", &Call); diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 066724bea92c9..e643bdf6fea74 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -101,6 +101,9 @@ def FeaturePerfMon : ExtensionWithMArch<"perfmon", "PerfMon", "FEAT_PMUv3", def FeatureSpecRestrict : Extension<"specrestrict", "SpecRestrict", "FEAT_CSV2_2", "Enable architectural speculation restriction">; +def FeatureRPRFM : ExtensionWithMArch<"rprfm", "RPRFM", "FEAT_RPRFM", + "Enable Armv8.0-A Range Prefetch Memory instruction">; + //===----------------------------------------------------------------------===// // Armv8.1 Architecture Extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2ce8f6d924a78..dd6248afe9358 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6161,6 +6161,29 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr); } + case Intrinsic::aarch64_range_prefetch: { + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(2); + + unsigned IsWrite = Op.getConstantOperandVal(3); + unsigned IsStream = Op.getConstantOperandVal(4); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + uint64_t Distance = Op.getConstantOperandVal(5); + int64_t Stride = Op.getConstantOperandVal(6); + uint64_t Count = Op.getConstantOperandVal(7); + int64_t Length = Op.getConstantOperandVal(8); + uint64_t Mask22 = (1ULL << 22) - 1; + uint64_t Mask16 = (1ULL << 16) - 1; + uint64_t Metadata = (Distance << 60) | + ((Stride & Mask22) << 38) | + ((Count & Mask16) << 22) | + (Length & Mask22); + + return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain, + DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr, + DAG.getConstant(Metadata, DL, MVT::i64)); + } case Intrinsic::aarch64_sme_str: case Intrinsic::aarch64_sme_ldr: { return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr); diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7d99786830e3d..c40a9e34b37a2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction { let hasSideEffects = 1; } +def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2); + let hasSideEffects = 1; +} + def G_UMULL : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); @@ -303,6 +309,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>; def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>; +def : GINodeEquiv<G_AARCH64_RANGE_PREFETCH, AArch64RangePrefetch>; def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index da93a2b13fc11..0007ddba3d941 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -411,6 +411,8 @@ def HasS1POE2 : Predicate<"Subtarget->hasS1POE2()">, AssemblerPredicateWithAll<(all_of FeatureS1POE2), "poe2">; def HasTEV : Predicate<"Subtarget->hasTEV()">, AssemblerPredicateWithAll<(all_of FeatureTEV), "tev">; +def HasRPRFM : Predicate<"Subtarget->hasRPRFM()">, + AssemblerPredicateWithAll<(all_of FeatureRPRFM), "rprfm">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -536,6 +538,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,3>]>; def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisPtrTy<2>]>; def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; @@ -1038,6 +1041,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, [SDNPHasChain, SDNPSideEffect]>; +def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH", + SDT_AArch64RANGE_PREFETCH, + [SDNPHasChain, SDNPSideEffect]>; + // {s|u}int to FP within a FP register. def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; @@ -10980,6 +10987,9 @@ def RPRFM: let DecoderNamespace = "Fallback"; } +def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm), + (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>; + //===----------------------------------------------------------------------===// // 128-bit Atomics (FEAT_LSE128) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 433cb0387c470..09075d7fac90a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3954,6 +3954,7 @@ static const struct Extension { {"poe2", {AArch64::FeatureS1POE2}}, {"tev", {AArch64::FeatureTEV}}, {"btie", {AArch64::FeatureBTIE}}, + {"rprfm", {AArch64::FeatureRPRFM}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 1025b2502211a..dad362785ba3f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1749,6 +1749,33 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_range_prefetch: { + auto &AddrVal = MI.getOperand(1); + + int64_t IsWrite = MI.getOperand(2).getImm(); + int64_t IsStream = MI.getOperand(3).getImm(); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + int64_t Distance = MI.getOperand(4).getImm(); + int64_t Stride = MI.getOperand(5).getImm(); + int64_t Count = MI.getOperand(6).getImm(); + int64_t Length = MI.getOperand(7).getImm(); + uint64_t Mask22 = (1ULL << 22) - 1; + uint64_t Mask16 = (1ULL << 16) - 1; + uint64_t Metadata = (Distance << 60) | + ((Stride & Mask22) << 38) | + ((Count & Mask16) << 22) | + (Length & Mask22); + + auto MetadataReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + MIB.buildConstant(MetadataReg, Metadata); + MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH) + .addImm(PrfOp) + .add(AddrVal) + .addUse(MetadataReg); + MI.eraseFromParent(); + return true; + } case Intrinsic::aarch64_neon_uaddv: case Intrinsic::aarch64_neon_saddv: case Intrinsic::aarch64_neon_umaxv: @@ -2506,4 +2533,4 @@ bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI, MRI.replaceRegWith(Dst, Fin); MI.eraseFromParent(); return true; -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll new file mode 100644 index 0000000000000..a010346d58979 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s + +define void @test(ptr %a, ptr %b) { +; CHECK-LABEL: test: +; CHECK: mov x8, #-562675075514369 +; CHECK-NEXT: mov x9, #4192256 +; CHECK-NEXT: movk x8, #2040 +; CHECK-NEXT: orr x9, x9, #0x1fe0000000000 +; CHECK-NEXT: movk x8, #65472, lsl #16 + +; CHECK-NEXT: rprfm pldkeep, x8, [x0] + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65535, i32 2040) + +; CHECK-NEXT: rprfm pstkeep, x8, [x1] + call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65535, i32 2040) + +; CHECK-NEXT: rprfm pldstrm, x9, [x0] + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 0, i32 -2048) + +; CHECK-NEXT: rprfm pststrm, x9, [x1] + call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 0, i32 -2048) + + ret void +} >From ed82848f29f19720c19ddbd09b57dd135dc5a43a Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin <[email protected]> Date: Mon, 8 Dec 2025 11:49:25 +0000 Subject: [PATCH 2/3] - Change range of Count argument (1 to 65535) and subtract 1 during lowering - Add Verifier tests - Run clang-format --- clang/lib/Sema/SemaARM.cpp | 2 +- clang/test/CodeGen/arm_acle.c | 4 +- clang/test/Sema/builtins-arm64.c | 2 +- llvm/lib/IR/Verifier.cpp | 11 +++-- .../Target/AArch64/AArch64ISelLowering.cpp | 8 ++-- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++-- llvm/test/CodeGen/AArch64/range-prefetch.ll | 8 ++-- .../test/Verifier/AArch64/intrinsic-immarg.ll | 41 +++++++++++++++++++ 8 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 llvm/test/Verifier/AArch64/intrinsic-immarg.ll diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index da3438fb77118..2f565773c43ae 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1127,7 +1127,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) || SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) || SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) || - SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) || + SemaRef.BuiltinConstantArgRange(TheCall, 5, 1, 65536) || SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040); } diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 1f1c8b82c0ae1..dd0713e69666f 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -168,11 +168,11 @@ void test_pld() { // AArch64-LABEL: @test_rpld( // AArch64-NEXT: entry: -// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040) +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65536, i32 2040) // AArch64-NEXT: ret void // void test_rpld() { - __rpld(1, 1, 15, -2048, 65535, 2040, 0); + __rpld(1, 1, 15, -2048, 65536, 2040, 0); } #endif diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c index 3d26b16d461d0..441f91dc4e194 100644 --- a/clang/test/Sema/builtins-arm64.c +++ b/clang/test/Sema/builtins-arm64.c @@ -36,7 +36,7 @@ void test_range_prefetch(void) { __builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} __builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} __builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65537, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}} __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}} } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 17c4bfd67b4c0..ae7da03f82025 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6714,12 +6714,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(Stride > -2049 && Stride < 2041, "stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040", Call); - Check(cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue() < 65536, - "count argument to llvm.aarch64.range.prefetch must be < 65536"); + int Count = cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue(); + Check(Count > 0 && Count < 65537, + "count argument to llvm.aarch64.range.prefetch must be < 65537", + Call); int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue(); Check(Length > -2049 && Length < 2041, - "length argument to llvm.aarch64.range.prefetch must be -2048 -" - "2040"); + "length argument to llvm.aarch64.range.prefetch must be -2048 - " + "2040", + Call); break; } case Intrinsic::callbr_landingpad: { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dd6248afe9358..b7813695facfa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6171,14 +6171,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, uint64_t Distance = Op.getConstantOperandVal(5); int64_t Stride = Op.getConstantOperandVal(6); - uint64_t Count = Op.getConstantOperandVal(7); + uint64_t Count = Op.getConstantOperandVal(7) - 1; int64_t Length = Op.getConstantOperandVal(8); uint64_t Mask22 = (1ULL << 22) - 1; uint64_t Mask16 = (1ULL << 16) - 1; - uint64_t Metadata = (Distance << 60) | - ((Stride & Mask22) << 38) | - ((Count & Mask16) << 22) | - (Length & Mask22); + uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) | + ((Count & Mask16) << 22) | (Length & Mask22); return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index dad362785ba3f..9ed8ed5c53e2f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1758,14 +1758,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, int64_t Distance = MI.getOperand(4).getImm(); int64_t Stride = MI.getOperand(5).getImm(); - int64_t Count = MI.getOperand(6).getImm(); + int64_t Count = MI.getOperand(6).getImm() - 1; int64_t Length = MI.getOperand(7).getImm(); uint64_t Mask22 = (1ULL << 22) - 1; uint64_t Mask16 = (1ULL << 16) - 1; - uint64_t Metadata = (Distance << 60) | - ((Stride & Mask22) << 38) | - ((Count & Mask16) << 22) | - (Length & Mask22); + uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) | + ((Count & Mask16) << 22) | (Length & Mask22); auto MetadataReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); MIB.buildConstant(MetadataReg, Metadata); diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll index a010346d58979..973a4e8684c48 100644 --- a/llvm/test/CodeGen/AArch64/range-prefetch.ll +++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll @@ -13,16 +13,16 @@ define void @test(ptr %a, ptr %b) { ; CHECK-NEXT: movk x8, #65472, lsl #16 ; CHECK-NEXT: rprfm pldkeep, x8, [x0] - call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65535, i32 2040) + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65536, i32 2040) ; CHECK-NEXT: rprfm pstkeep, x8, [x1] - call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65535, i32 2040) + call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65536, i32 2040) ; CHECK-NEXT: rprfm pldstrm, x9, [x0] - call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 0, i32 -2048) + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 1, i32 -2048) ; CHECK-NEXT: rprfm pststrm, x9, [x1] - call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 0, i32 -2048) + call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 1, i32 -2048) ret void } diff --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll new file mode 100644 index 0000000000000..cbaf285efc156 --- /dev/null +++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll @@ -0,0 +1,41 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +define void @range_prefetch(ptr %src) { + ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0) + + ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0) + + ; CHECK-NEXT: reuse distance argument to llvm.aarch64.range.prefetch must be < 16 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0) + + ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0) + + ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0) + + ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + + ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0) + + ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049) + + ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041) + + ret void +} >From 8d46a48889491d8011e102ac1af933f6676725b9 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin <[email protected]> Date: Wed, 10 Dec 2025 11:42:24 +0000 Subject: [PATCH 3/3] - Rename __rpld to __pldx_range - Add second intrinsic (__pld_range) which takes a single metadata value - Reorder the arguments of __pldx_range - Removed RPRFM feature macro & added __ARM_PREFETCH_RANGE - Map ReuseDistance to decreasing powers of two from 512MiB-32KiB --- clang/include/clang/Basic/BuiltinsAArch64.def | 3 +- clang/lib/Basic/Targets/AArch64.cpp | 10 +-- clang/lib/Basic/Targets/AArch64.h | 1 - clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 10 +++ clang/lib/Headers/arm_acle.h | 10 ++- clang/lib/Sema/SemaARM.cpp | 14 +-- clang/test/CodeGen/arm_acle.c | 19 ++-- clang/test/CodeGen/builtins-arm64.c | 28 ++++-- .../print-supported-extensions-aarch64.c | 1 - .../Preprocessor/aarch64-target-features.c | 4 +- clang/test/Preprocessor/init-aarch64.c | 1 + clang/test/Sema/builtins-arm64.c | 18 ++-- llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++- llvm/lib/IR/Verifier.cpp | 34 +++++--- llvm/lib/Target/AArch64/AArch64Features.td | 3 - .../Target/AArch64/AArch64ISelLowering.cpp | 38 ++++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 - .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 - .../AArch64/GISel/AArch64LegalizerInfo.cpp | 39 +++++++-- llvm/test/CodeGen/AArch64/range-prefetch.ll | 86 +++++++++++++++---- .../test/Verifier/AArch64/intrinsic-immarg.ll | 80 +++++++++-------- 21 files changed, 285 insertions(+), 128 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 7bbf747d705c7..daac7776168f7 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -97,7 +97,8 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a") BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") // Range Prefetch -BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiUiiUii", "nc") +TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n", ARMACLE_H, ALL_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiULLi", "n", ARMACLE_H, ALL_LANGUAGES, "") // System Registers BUILTIN(__builtin_arm_rsr, "UicC*", "nc") diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 38018953a269e..8cb8f4debc2e9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -474,6 +474,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); + // Clang supports range prefetch intrinsics + Builder.defineMacro("__ARM_PREFETCH_RANGE", "1"); + if (FPU & NeonMode) { Builder.defineMacro("__ARM_NEON", "1"); // 64-bit NEON supports half, single and double precision operations. @@ -612,9 +615,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasLSE) Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1"); - if (HasRPRFM) - Builder.defineMacro("__ARM_FEATURE_RPRFM", "1"); - if (HasBFloat16) { Builder.defineMacro("__ARM_FEATURE_BF16", "1"); Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); @@ -873,7 +873,6 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("ssve-fp8fma", HasSSVE_FP8FMA) .Case("sme-f8f32", HasSME_F8F32) .Case("sme-f8f16", HasSME_F8F16) - .Case("rprfm", HasRPRFM) .Default(false); } @@ -1104,9 +1103,6 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, if (Feature == "+strict-align") HasUnalignedAccess = false; - if (Feature == "+rprfm") - HasRPRFM = true; - // All predecessor archs are added but select the latest one for ArchKind. if (Feature == "+v8a" && ArchInfo->Version < llvm::AArch64::ARMV8A.Version) ArchInfo = &llvm::AArch64::ARMV8A; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 866a9cb2c2711..1a7aa658e9d87 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -131,7 +131,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasRCPC3 = false; bool HasSMEFA64 = false; bool HasPAuthLR = false; - bool HasRPRFM = false; const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A; diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index d4b0b81d3d87f..55a9e47b643ff 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -5415,6 +5415,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); } + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) { + SmallVector<llvm::Value *, 16> Ops; + for (unsigned I = 0; I < 6; ++I) + Ops.push_back(EmitScalarExpr(E->getArg(I))); + Ops.push_back( + Builder.CreateIntCast(EmitScalarExpr(E->getArg(6)), Int64Ty, false)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch_imm), Ops); + } + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 4b6cd97be602a..622e8f3d6aa7b 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -98,10 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) { #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) -#define __rpld(access_kind, retention_policy, reuse_distance, stride, count, \ - length, addr) \ - __builtin_arm_range_prefetch(addr, access_kind, retention_policy, \ - reuse_distance, stride, count, length) +#define __pldx_range(access_kind, retention_policy, length, count, stride, \ + reuse_distance, addr) \ + __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length, \ + count, stride, reuse_distance) +#define __pld_range(access_kind, retention_policy, metadata, addr) \ + __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata) #endif /* 7.6.2 Instruction prefetch */ diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 2f565773c43ae..c898288f1bf97 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1122,13 +1122,17 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1); } - if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) { + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) { return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) || - SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) || - SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) || - SemaRef.BuiltinConstantArgRange(TheCall, 5, 1, 65536) || - SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040); + SemaRef.BuiltinConstantArgRange(TheCall, 3, -2048, 2040) || + SemaRef.BuiltinConstantArgRange(TheCall, 4, 1, 65536) || + SemaRef.BuiltinConstantArgRange(TheCall, 5, -2048, 2040); + } + + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) { + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1); } if (BuiltinID == AArch64::BI__builtin_arm_rsr64 || diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index dd0713e69666f..5a9092efd8aab 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -164,15 +164,24 @@ void test_pld() { __pld(0); } -#if defined(__ARM_64BIT_STATE) +#if defined(__ARM_64BIT_STATE) && defined(__ARM_PREFETCH_RANGE) -// AArch64-LABEL: @test_rpld( +// AArch64-LABEL: @test_pld_range( // AArch64-NEXT: entry: -// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65536, i32 2040) +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch.reg(ptr null, i32 0, i32 1, i64 [[MD:%.*]]) // AArch64-NEXT: ret void // -void test_rpld() { - __rpld(1, 1, 15, -2048, 65536, 2040, 0); +void test_pld_range(unsigned long md) { + __pld_range(0, 1, md, 0); +} + +// AArch64-LABEL: @test_pldx_range( +// AArch64-NEXT: entry: +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr null, i32 0, i32 1, i32 2040, i32 65536, i32 -2048, i64 15) +// AArch64-NEXT: ret void +// +void test_pldx_range() { + __pldx_range(0, 1, 2040, 65536, -2048, 15, 0); } #endif diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 1262823bf6ed3..3fff37540e09f 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -63,17 +63,29 @@ void prefetch(void) { } void range_prefetch(void) { - __builtin_arm_range_prefetch(0, 0, 0, 15, 1024, 24, 2); // pldkeep - // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i32 15, i32 1024, i32 24, i32 2) + __builtin_arm_range_prefetch(0, 0, 0, 0); // pldkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.reg(ptr null, i32 0, i32 0, i64 0) - __builtin_arm_range_prefetch(0, 0, 1, 15, 1024, 24, 2); // pldstrm - // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i32 15, i32 1024, i32 24, i32 2) + __builtin_arm_range_prefetch(0, 0, 1, 0); // pldstrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.reg(ptr null, i32 0, i32 1, i64 0) - __builtin_arm_range_prefetch(0, 1, 0, 15, 1024, 24, 2); // pstkeep - // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i32 15, i32 1024, i32 24, i32 2) + __builtin_arm_range_prefetch(0, 1, 0, 0); // pstkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.reg(ptr null, i32 1, i32 0, i64 0) - __builtin_arm_range_prefetch(0, 1, 1, 15, 1024, 24, 2); // pststrm - // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 1024, i32 24, i32 2) + __builtin_arm_range_prefetch(0, 1, 1, 0); // pststrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.reg(ptr null, i32 1, i32 1, i64 0) + + __builtin_arm_range_prefetch_x(0, 0, 0, 2, 24, 1024, 15); // pldkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.imm(ptr null, i32 0, i32 0, i32 2, i32 24, i32 1024, i64 15) + + __builtin_arm_range_prefetch_x(0, 0, 1, 2, 24, 1024, 15); // pldstrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.imm(ptr null, i32 0, i32 1, i32 2, i32 24, i32 1024, i64 15) + + __builtin_arm_range_prefetch_x(0, 1, 0, 2, 24, 1024, 15); // pstkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.imm(ptr null, i32 1, i32 0, i32 2, i32 24, i32 1024, i64 15) + + __builtin_arm_range_prefetch_x(0, 1, 1, 2, 24, 1024, 15); // pststrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch.imm(ptr null, i32 1, i32 1, i32 2, i32 24, i32 1024, i64 15) } __attribute__((target("v8.5a"))) diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index 1a34478f11c6b..1f8929e705e4c 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -69,7 +69,6 @@ // CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension // CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set // CHECK-NEXT: rdm FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions -// CHECK-NEXT: rprfm FEAT_RPRFM Enable Armv8.0-A Range Prefetch Memory instruction // CHECK-NEXT: sb FEAT_SB Enable Armv8.5-A Speculation Barrier // CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support // CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index d5d78f1118a4f..137840f6d2864 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -41,6 +41,7 @@ // CHECK: __ARM_NEON_FP 0xE // CHECK: __ARM_NEON_SVE_BRIDGE 1 // CHECK: __ARM_PCS_AAPCS64 1 +// CHECK: __ARM_PREFETCH_RANGE 1 // CHECK-NOT: __ARM_PCS 1 // CHECK-NOT: __ARM_PCS_VFP 1 // CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1 @@ -789,6 +790,3 @@ // CHECK-SMEF8F16: __ARM_FEATURE_FP8 1 // CHECK-SMEF8F16: __ARM_FEATURE_SME2 1 // CHECK-SMEF8F16: __ARM_FEATURE_SME_F8F16 1 - -// RUN: %clang --target=aarch64 -march=armv8-a+rprfm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RPRFM %s -// CHECK-RPRFM: __ARM_FEATURE_RPRFM 1 diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index 460778f39d003..09e3fc926a309 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -32,6 +32,7 @@ // AARCH64-NEXT: #define __ARM_FP16_FORMAT_IEEE 1 // AARCH64-NEXT: #define __ARM_NEON_SVE_BRIDGE 1 // AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1 +// AARCH64-NEXT: #define __ARM_PREFETCH_RANGE 1 // AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4 // AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4 // AARCH64-NEXT: #define __ARM_STATE_ZA 1 diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c index 441f91dc4e194..5f155ddfa4d0a 100644 --- a/clang/test/Sema/builtins-arm64.c +++ b/clang/test/Sema/builtins-arm64.c @@ -31,14 +31,16 @@ void test_prefetch(void) { } void test_range_prefetch(void) { - __builtin_arm_range_prefetch(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65537, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}} - __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 2, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + + __builtin_arm_range_prefetch_x(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, -2049, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 2041, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 65537, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, -2049, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, 2041, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} } void test_trap(short s, unsigned short us) { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 43a7f10ce2618..cce5b1cbab306 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -76,12 +76,17 @@ def int_aarch64_prefetch : Intrinsic<[], ]>, ClangBuiltin<"__builtin_arm_prefetch">; -def int_aarch64_range_prefetch : Intrinsic<[], +def int_aarch64_range_prefetch_imm : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], + llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, - ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>]>, + ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>]>; + +def int_aarch64_range_prefetch_reg : Intrinsic<[], + [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>, + ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>, ClangBuiltin<"__builtin_arm_range_prefetch">; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae7da03f82025..1347c5c702e08 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6701,26 +6701,34 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call); break; } - case Intrinsic::aarch64_range_prefetch: { + case Intrinsic::aarch64_range_prefetch_reg: { Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2, - "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call); + "write argument to llvm.aarch64.range.prefetch.reg must be 0 or 1", + Call); Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2, - "stream argument to llvm.aarch64.range.prefetch must be 0 or 1", + "stream argument to llvm.aarch64.range.prefetch.reg must be 0 or 1", Call); - Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 16, - "reuse distance argument to llvm.aarch64.range.prefetch must be < 16", + break; + } + case Intrinsic::aarch64_range_prefetch_imm: { + Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2, + "write argument to llvm.aarch64.range.prefetch.imm must be 0 or 1", Call); - int Stride = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue(); - Check(Stride > -2049 && Stride < 2041, - "stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040", + Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2, + "stream argument to llvm.aarch64.range.prefetch.imm must be 0 or 1", Call); - int Count = cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue(); + int Length = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue(); + Check(Length > -2049 && Length < 2041, + "length argument to llvm.aarch64.range.prefetch.imm must be -2048 - " + "2040", + Call); + int Count = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue(); Check(Count > 0 && Count < 65537, - "count argument to llvm.aarch64.range.prefetch must be < 65537", + "count argument to llvm.aarch64.range.prefetch.imm must be < 65537", Call); - int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue(); - Check(Length > -2049 && Length < 2041, - "length argument to llvm.aarch64.range.prefetch must be -2048 - " + int Stride = cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue(); + Check(Stride > -2049 && Stride < 2041, + "stride argument to llvm.aarch64.range.prefetch.imm must be -2048 - " "2040", Call); break; diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index e643bdf6fea74..066724bea92c9 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -101,9 +101,6 @@ def FeaturePerfMon : ExtensionWithMArch<"perfmon", "PerfMon", "FEAT_PMUv3", def FeatureSpecRestrict : Extension<"specrestrict", "SpecRestrict", "FEAT_CSV2_2", "Enable architectural speculation restriction">; -def FeatureRPRFM : ExtensionWithMArch<"rprfm", "RPRFM", "FEAT_RPRFM", - "Enable Armv8.0-A Range Prefetch Memory instruction">; - //===----------------------------------------------------------------------===// // Armv8.1 Architecture Extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b7813695facfa..58f5bbfe2cbb3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6161,7 +6161,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr); } - case Intrinsic::aarch64_range_prefetch: { + case Intrinsic::aarch64_range_prefetch_imm: { SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(2); @@ -6169,10 +6169,25 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, unsigned IsStream = Op.getConstantOperandVal(4); unsigned PrfOp = (IsStream << 2) | IsWrite; - uint64_t Distance = Op.getConstantOperandVal(5); - int64_t Stride = Op.getConstantOperandVal(6); - uint64_t Count = Op.getConstantOperandVal(7) - 1; - int64_t Length = Op.getConstantOperandVal(8); + int64_t Length = Op.getConstantOperandVal(5); + uint64_t Count = Op.getConstantOperandVal(6) - 1; + int64_t Stride = Op.getConstantOperandVal(7); + + // Map ReuseDistance given in bytes to four bits representing decreasing + // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values + // are rounded up to the nearest power of 2, starting at 32KiB. Any value + // over the maximum is represented by 0 (distance not known). + uint64_t Distance = Op.getConstantOperandVal(8); + if (Distance > 0) { + Distance = llvm::Log2_32_Ceil(Distance); + if (Distance < 15) + Distance = 15; + else if (Distance > 29) + Distance = 0; + else + Distance = 30 - Distance; + } + uint64_t Mask22 = (1ULL << 22) - 1; uint64_t Mask16 = (1ULL << 16) - 1; uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) | @@ -6182,6 +6197,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr, DAG.getConstant(Metadata, DL, MVT::i64)); } + case Intrinsic::aarch64_range_prefetch_reg: { + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(2); + + unsigned IsWrite = Op.getConstantOperandVal(3); + unsigned IsStream = Op.getConstantOperandVal(4); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + SDValue Metadata = Op.getOperand(5); + return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain, + DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr, + Metadata); + } case Intrinsic::aarch64_sme_str: case Intrinsic::aarch64_sme_ldr: { return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0007ddba3d941..710beb11f4dcc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -411,8 +411,6 @@ def HasS1POE2 : Predicate<"Subtarget->hasS1POE2()">, AssemblerPredicateWithAll<(all_of FeatureS1POE2), "poe2">; def HasTEV : Predicate<"Subtarget->hasTEV()">, AssemblerPredicateWithAll<(all_of FeatureTEV), "tev">; -def HasRPRFM : Predicate<"Subtarget->hasRPRFM()">, - AssemblerPredicateWithAll<(all_of FeatureRPRFM), "rprfm">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 09075d7fac90a..433cb0387c470 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3954,7 +3954,6 @@ static const struct Extension { {"poe2", {AArch64::FeatureS1POE2}}, {"tev", {AArch64::FeatureTEV}}, {"btie", {AArch64::FeatureBTIE}}, - {"rprfm", {AArch64::FeatureRPRFM}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9ed8ed5c53e2f..56b4a4552e874 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1749,17 +1749,32 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } - case Intrinsic::aarch64_range_prefetch: { + case Intrinsic::aarch64_range_prefetch_imm: { auto &AddrVal = MI.getOperand(1); int64_t IsWrite = MI.getOperand(2).getImm(); int64_t IsStream = MI.getOperand(3).getImm(); unsigned PrfOp = (IsStream << 2) | IsWrite; - int64_t Distance = MI.getOperand(4).getImm(); - int64_t Stride = MI.getOperand(5).getImm(); - int64_t Count = MI.getOperand(6).getImm() - 1; - int64_t Length = MI.getOperand(7).getImm(); + int64_t Length = MI.getOperand(4).getImm(); + int64_t Count = MI.getOperand(5).getImm() - 1; + int64_t Stride = MI.getOperand(6).getImm(); + + // Map ReuseDistance given in bytes to four bits representing decreasing + // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values + // are rounded up to the nearest power of 2, starting at 32KiB. Any value + // over the maximum is represented by 0 (distance not known). + uint64_t Distance = MI.getOperand(7).getImm(); + if (Distance > 0) { + Distance = llvm::Log2_32_Ceil(Distance); + if (Distance < 15) + Distance = 15; + else if (Distance > 29) + Distance = 0; + else + Distance = 30 - Distance; + } + uint64_t Mask22 = (1ULL << 22) - 1; uint64_t Mask16 = (1ULL << 16) - 1; uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) | @@ -1774,6 +1789,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_range_prefetch_reg: { + auto &AddrVal = MI.getOperand(1); + + int64_t IsWrite = MI.getOperand(2).getImm(); + int64_t IsStream = MI.getOperand(3).getImm(); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH) + .addImm(PrfOp) + .add(AddrVal) + .addUse(MI.getOperand(4).getReg()); // Metadata + MI.eraseFromParent(); + return true; + } case Intrinsic::aarch64_neon_uaddv: case Intrinsic::aarch64_neon_saddv: case Intrinsic::aarch64_neon_umaxv: diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll index 973a4e8684c48..77023661bca72 100644 --- a/llvm/test/CodeGen/AArch64/range-prefetch.ll +++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll @@ -1,28 +1,80 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rprfm --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s -define void @test(ptr %a, ptr %b) { -; CHECK-LABEL: test: -; CHECK: mov x8, #-562675075514369 -; CHECK-NEXT: mov x9, #4192256 -; CHECK-NEXT: movk x8, #2040 -; CHECK-NEXT: orr x9, x9, #0x1fe0000000000 -; CHECK-NEXT: movk x8, #65472, lsl #16 +define void @range_prefetch_metadata_val(ptr %a, ptr %b, i64 %metadata) { +; CHECK-LABEL: range_prefetch_metadata_val: +; CHECK: // %bb.0: +; CHECK-NEXT: rprfm pldkeep, x2, [x0] +; CHECK-NEXT: rprfm pstkeep, x2, [x0] +; CHECK-NEXT: rprfm pldstrm, x2, [x0] +; CHECK-NEXT: rprfm pststrm, x2, [x0] +; CHECK-NEXT: ret -; CHECK-NEXT: rprfm pldkeep, x8, [x0] - call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i32 15, i32 -2048, i32 65536, i32 2040) + call void @llvm.aarch64.range.prefetch.reg(ptr %a, i32 0, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch.reg(ptr %a, i32 1, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch.reg(ptr %a, i32 0, i32 1, i64 %metadata) + call void @llvm.aarch64.range.prefetch.reg(ptr %a, i32 1, i32 1, i64 %metadata) -; CHECK-NEXT: rprfm pstkeep, x8, [x1] - call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 0, i32 15, i32 -2048, i32 65536, i32 2040) + ret void +} + +define void @range_prefetch_accesses(ptr %a) { +; CHECK-LABEL: range_prefetch_accesses: +; CHECK: // %bb.0: +; CHECK-NEXT: rprfm pldkeep, xzr, [x0] +; CHECK-NEXT: rprfm pstkeep, xzr, [x0] +; CHECK-NEXT: rprfm pldstrm, xzr, [x0] +; CHECK-NEXT: rprfm pststrm, xzr, [x0] +; CHECK-NEXT: ret + + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 0, i32 0, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 1, i32 0, i32 0, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 1, i32 0, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 1, i32 1, i32 0, i32 1, i32 0, i64 0) + ret void +} + +define void @range_prefetch_metatdata_distance_rounding(ptr %a) { +; CHECK-LABEL: range_prefetch_metatdata_distance_rounding: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1152921504606846976 // =0xf000000000000000 +; CHECK-NEXT: mov x9, #-2305843009213693952 // =0xe000000000000000 +; CHECK-NEXT: mov x10, #-6917529027641081856 // =0xa000000000000000 +; CHECK-NEXT: rprfm pldkeep, x8, [x0] +; CHECK-NEXT: rprfm pldkeep, x9, [x0] +; CHECK-NEXT: rprfm pldkeep, x10, [x0] +; CHECK-NEXT: rprfm pldkeep, xzr, [x0] +; CHECK-NEXT: ret -; CHECK-NEXT: rprfm pldstrm, x9, [x0] - call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i32 0, i32 2040, i32 1, i32 -2048) + ; Distance less than minumum, round up to first power of two (1111) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 0, i32 0, i32 1, i32 0, i64 1) + + ; Distance 1 over minimum, round up to next power of 2 (1110) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 0, i32 0, i32 1, i32 0, i64 32769) + + ; Distance is a power of two in range (1010) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 0, i32 0, i32 1, i32 0, i64 1048576) + + ; Distance is out of range, set to 0 (0000) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 0, i32 0, i32 0, i32 1, i32 0, i64 536870913) + + ret void +} -; CHECK-NEXT: rprfm pststrm, x9, [x1] - call void @llvm.aarch64.range.prefetch(ptr %b, i32 1, i32 1, i32 0, i32 2040, i32 1, i32 -2048) +define void @range_prefetch_metatdata(ptr %a) { +; CHECK-LABEL: range_prefetch_metatdata: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #274869518336 // =0x3fff800000 +; CHECK-NEXT: mov x9, #4192256 // =0x3ff800 +; CHECK-NEXT: movk x8, #2040 +; CHECK-NEXT: orr x9, x9, #0x1fe0000000000 +; CHECK-NEXT: movk x8, #4094, lsl #48 +; CHECK-NEXT: rprfm pststrm, x8, [x0] +; CHECK-NEXT: rprfm pststrm, x9, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 1, i32 1, i32 2040, i32 65535, i32 -2048, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %a, i32 1, i32 1, i32 -2048, i32 1, i32 2040, i64 0) ret void } diff --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll index cbaf285efc156..25ccbc448457d 100644 --- a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll @@ -1,41 +1,49 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s -define void @range_prefetch(ptr %src) { - ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i32 0, i32 0, i32 1, i32 0) - - ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0) - - ; CHECK-NEXT: reuse distance argument to llvm.aarch64.range.prefetch must be < 16 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 16, i32 0, i32 1, i32 0) - - ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 -2049, i32 1, i32 0) - - ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 2041, i32 1, i32 0) - - ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - - ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch must be < 65537 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 65537, i32 0) - - ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 -2049) - - ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch must be -2048 - 2040 - ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041) - call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2041) +define void @range_prefetch_reg(ptr %src, i64 %metadata) { + ; CHECK: write argument to llvm.aarch64.range.prefetch.reg must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.reg(ptr %src, i32 2, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch.reg(ptr %src, i32 2, i32 0, i64 %metadata) + + ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch.reg must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.reg(ptr %src, i32 0, i32 2, i64 %metadata) + call void @llvm.aarch64.range.prefetch.reg(ptr %src, i32 0, i32 2, i64 %metadata) + + ret void +} + +define void @range_prefetch_imm(ptr %src) { + ; CHECK: write argument to llvm.aarch64.range.prefetch.imm must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 2, i32 0, i32 0, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 2, i32 0, i32 0, i32 1, i32 0, i64 0) + + ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch.imm must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 2, i32 0, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 2, i32 0, i32 1, i32 0, i64 0) + + ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch.imm must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 -2049, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 -2049, i32 1, i32 0, i64 0) + + ; CHECK-NEXT: length argument to llvm.aarch64.range.prefetch.imm must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 2041, i32 1, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 2041, i32 1, i32 0, i64 0) + + ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch.imm must be < 65537 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 0, i32 0, i64 0) + + ; CHECK-NEXT: count argument to llvm.aarch64.range.prefetch.imm must be < 65537 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 65537, i32 0, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 65537, i32 0, i64 0) + + ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch.imm must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 1, i32 -2049, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 1, i32 -2049, i64 0) + + ; CHECK-NEXT: stride argument to llvm.aarch64.range.prefetch.imm must be -2048 - 2040 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 1, i32 2041, i64 0) + call void @llvm.aarch64.range.prefetch.imm(ptr %src, i32 0, i32 0, i32 0, i32 1, i32 2041, i64 0) ret void } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
