Author: Kerry McLaughlin
Date: 2026-01-12T15:53:17Z
New Revision: 04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a

URL: 
https://github.com/llvm/llvm-project/commit/04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a
DIFF: 
https://github.com/llvm/llvm-project/commit/04e5bc7dfbd0a5b4c00470ccea4cc43dcfc2d38a.diff

LOG: [AArch64] Add support for range prefetch intrinsic (#170490)

This patch adds support in Clang for the RPRFM instruction, by adding
the following intrinsics:

```
void __pldx_range(unsigned int *access_kind*, unsigned int retention_policy,
                               signed int length*, unsigned int count, signed 
int stride,
                               size_t reuse distance,  void const *addr);

void __pld_range(unsigned int access_kind*, unsigned int retention_policy,
                              uint64_t metadata, void const *addr);
```

The `__ARM_PREFETCH_RANGE` macro can be used to test whether these
intrinsics are implemented. If the RPRFM instruction is not available, this
instruction is a NOP.

This implements the following ACLE proposal:
https://github.com/ARM-software/acle/pull/423

Added: 
    llvm/test/CodeGen/AArch64/range-prefetch.ll
    llvm/test/Verifier/AArch64/intrinsic-immarg.ll

Modified: 
    clang/include/clang/Basic/BuiltinsAArch64.def
    clang/lib/Basic/Targets/AArch64.cpp
    clang/lib/CodeGen/TargetBuiltins/ARM.cpp
    clang/lib/Headers/arm_acle.h
    clang/lib/Sema/SemaARM.cpp
    clang/test/CodeGen/arm_acle.c
    clang/test/CodeGen/builtins-arm64.c
    clang/test/Preprocessor/aarch64-target-features.c
    clang/test/Preprocessor/init-aarch64.c
    clang/test/Sema/builtins-arm64.c
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrGISel.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAArch64.def 
b/clang/include/clang/Basic/BuiltinsAArch64.def
index adb6c941e852a..5ae5affb51fde 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -96,6 +96,10 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
 // Prefetch
 BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
 
+// Range Prefetch
+BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n")
+BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiWUi", "n")
+
 // System Registers
 BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
 BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")

diff  --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index ecd441be364c2..fe407e9fc1789 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -477,6 +477,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
 
   Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : 
"4");
 
+  // Clang supports range prefetch intrinsics
+  Builder.defineMacro("__ARM_PREFETCH_RANGE", "1");
+
   if (FPU & NeonMode) {
     Builder.defineMacro("__ARM_NEON", "1");
     // 64-bit NEON supports half, single and double precision operations.

diff  --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 05dff01a71b9a..2d7128bf95df2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -2660,6 +2660,56 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction 
&CGF,
   return Builder.CreateCall(F, { Metadata, ArgValue });
 }
 
+static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned 
BuiltinID,
+                                       const CallExpr *E) {
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+  CodeGen::CodeGenModule &CGM = CGF.CGM;
+  SmallVector<llvm::Value *, 4> Ops;
+
+  auto getIntArg = [&](unsigned ArgNo) {
+    Expr::EvalResult Result;
+    if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext()))
+      llvm_unreachable("Expected constant argument to range prefetch.");
+    return Result.Val.getInt().getExtValue();
+  };
+
+  Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/
+  Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/
+  Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
+    auto Length = getIntArg(3);
+    auto Count = getIntArg(4) - 1;
+    auto Stride = getIntArg(5);
+    auto Distance = getIntArg(6);
+
+    // Map ReuseDistance given in bytes to four bits representing decreasing
+    // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
+    // are rounded up to the nearest power of 2, starting at 32KiB. Any value
+    // over the maximum is represented by 0 (distance not known).
+    if (Distance > 0) {
+      Distance = llvm::Log2_32_Ceil(Distance);
+      if (Distance < 15)
+        Distance = 15;
+      else if (Distance > 29)
+        Distance = 0;
+      else
+        Distance = 30 - Distance;
+    }
+
+    uint64_t Mask22 = (1ULL << 22) - 1;
+    uint64_t Mask16 = (1ULL << 16) - 1;
+    uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
+                        ((Count & Mask16) << 22) | (Length & Mask22);
+
+    Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata));
+  } else
+    Ops.push_back(CGF.EmitScalarExpr(E->getArg(3)));
+
+  return 
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch),
+                            Ops);
+}
+
 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
 /// argument that specifies the vector type.
 static bool HasExtraNeonArgument(unsigned BuiltinID) {
@@ -5447,6 +5497,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
         CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, 
Size});
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
+      BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
+    return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
+
   // Memory Tagging Extensions (MTE) Intrinsics
   Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {

diff  --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 97f63e8ecf71f..622e8f3d6aa7b 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -98,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
 #else
 #define __pldx(access_kind, cache_level, retention_policy, addr) \
   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#define __pldx_range(access_kind, retention_policy, length, count, stride,     
\
+                     reuse_distance, addr)                                     
\
+  __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length,  
\
+                                 count, stride, reuse_distance)
+#define __pld_range(access_kind, retention_policy, metadata, addr)             
\
+  __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata)
 #endif
 
 /* 7.6.2 Instruction prefetch */

diff  --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 81504b74c5e45..53e8c002a1962 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1122,6 +1122,19 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const 
TargetInfo &TI,
            SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) {
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+           SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
+           SemaRef.BuiltinConstantArgRange(TheCall, 3, -2097152, 2097151) ||
+           SemaRef.BuiltinConstantArgRange(TheCall, 4, 1, 65536) ||
+           SemaRef.BuiltinConstantArgRange(TheCall, 5, -2097152, 2097151);
+  }
+
+  if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+           SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1);
+  }
+
   if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
       BuiltinID == AArch64::BI__builtin_arm_rsr128 ||

diff  --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 0f539cba5c758..2606ad6dd2ec1 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -164,6 +164,28 @@ void test_pld() {
   __pld(0);
 }
 
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_PREFETCH_RANGE)
+
+// AArch64-LABEL: @test_pld_range(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    call void @llvm.aarch64.range.prefetch(ptr null, i32 0, 
i32 1, i64 [[MD:%.*]])
+// AArch64-NEXT:    ret void
+//
+void test_pld_range(uint64_t md) {
+  __pld_range(0, 1, md, 0);
+}
+
+// AArch64-LABEL: @test_pldx_range(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    call void @llvm.aarch64.range.prefetch(ptr null, i32 0, 
i32 1, i64 -576460477427613697)
+// AArch64-NEXT:    ret void
+//
+void test_pldx_range() {
+  __pldx_range(0, 1, 2097151, 65536, -2097152, 15, 0);
+}
+
+#endif
+
 // AArch32-LABEL: @test_pldx(
 // AArch32-NEXT:  entry:
 // AArch32-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)

diff  --git a/clang/test/CodeGen/builtins-arm64.c 
b/clang/test/CodeGen/builtins-arm64.c
index 86c2812434643..c1fd348371f38 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -62,6 +62,55 @@ void prefetch(void) {
   // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, 
i32 1)
 }
 
+void range_prefetch(void) {
+  __builtin_arm_range_prefetch(0, 0, 0, 0); // pldkeep
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 0)
+
+  __builtin_arm_range_prefetch(0, 0, 1, 0); // pldstrm
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, 
i64 0)
+
+  __builtin_arm_range_prefetch(0, 1, 0, 0); // pstkeep
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, 
i64 0)
+
+  __builtin_arm_range_prefetch(0, 1, 1, 0); // pststrm
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, 
i64 0)
+}
+
+void range_prefetch_x(void) {
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 0); // pldkeep
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 0)
+  __builtin_arm_range_prefetch_x(0, 0, 1, 0, 1, 0, 0); // pldstrm
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, 
i64 0)
+  __builtin_arm_range_prefetch_x(0, 1, 0, 0, 1, 0, 0); // pstkeep
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, 
i64 0)
+  __builtin_arm_range_prefetch_x(0, 1, 1, 0, 1, 0, 0); // pststrm
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, 
i64 0)
+
+  // Lower limits (length, count & stride)
+  __builtin_arm_range_prefetch_x(0, 0, 0, -2097152, 1, -2097152, 0);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 576460752305520640)
+
+  // Upper limits (length, count & stride)
+  __builtin_arm_range_prefetch_x(0, 0, 0, 2097151, 65536, 2097151, 0);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 576460752301326335)
+
+  // Distance less than minumum, round up to first power of two (1111)
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 -1152921504606846976)
+
+  // Distance 1 over minimum, round up to next power of 2 (1110)
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 32769);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 -2305843009213693952)
+
+  // Distance is a power of two in range (1010)
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1048576);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 -6917529027641081856)
+
+  // Distance is out of range, set to 0 (0000)
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 536870913);
+  // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, 
i64 0)
+}
+
 __attribute__((target("v8.5a")))
 int32_t jcvt(double v) {
   //CHECK-LABEL: @jcvt(

diff  --git a/clang/test/Preprocessor/aarch64-target-features.c 
b/clang/test/Preprocessor/aarch64-target-features.c
index 4dd243e57a63e..137840f6d2864 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -41,6 +41,7 @@
 // CHECK: __ARM_NEON_FP 0xE
 // CHECK: __ARM_NEON_SVE_BRIDGE 1
 // CHECK: __ARM_PCS_AAPCS64 1
+// CHECK: __ARM_PREFETCH_RANGE 1
 // CHECK-NOT: __ARM_PCS 1
 // CHECK-NOT: __ARM_PCS_VFP 1
 // CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1

diff  --git a/clang/test/Preprocessor/init-aarch64.c 
b/clang/test/Preprocessor/init-aarch64.c
index 460778f39d003..09e3fc926a309 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -32,6 +32,7 @@
 // AARCH64-NEXT: #define __ARM_FP16_FORMAT_IEEE 1
 // AARCH64-NEXT: #define __ARM_NEON_SVE_BRIDGE 1
 // AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1
+// AARCH64-NEXT: #define __ARM_PREFETCH_RANGE 1
 // AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4
 // AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4
 // AARCH64-NEXT: #define __ARM_STATE_ZA 1

diff  --git a/clang/test/Sema/builtins-arm64.c 
b/clang/test/Sema/builtins-arm64.c
index f094162b3aadc..41cffd7ebb1a0 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -30,6 +30,19 @@ void test_prefetch(void) {
   __builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value 
{{.*}} is outside the valid range}}
 }
 
+void test_range_prefetch(void) {
+  __builtin_arm_range_prefetch(0, 2, 0, 0); // expected-error-re {{argument 
value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch(0, 0, 2, 0); // expected-error-re {{argument 
value {{.*}} is outside the valid range}}
+
+  __builtin_arm_range_prefetch_x(0, 2, 0, 0, 0, 0, 0); // expected-error-re 
{{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 2, 0, 0, 0, 0); // expected-error-re 
{{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 0, -2097153, 0, 0, 0); // 
expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 0, 2097152, 0, 0, 0); // 
expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 65537, 0, 0); // 
expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, -2097153, 0); // 
expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, 2097152, 0); // 
expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
 void test_trap(short s, unsigned short us) {
   __builtin_arm_trap(42);
   __builtin_arm_trap(65535);
@@ -37,4 +50,4 @@ void test_trap(short s, unsigned short us) {
   __builtin_arm_trap(65536); // expected-warning {{implicit conversion from 
'int' to 'unsigned short' changes value from 65536 to 0}}
   __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' 
must be a constant integer}}
   __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' 
must be a constant integer}}
-}
\ No newline at end of file
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 221eca5b18d01..fd56e0e3f9e7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -76,6 +76,12 @@ def int_aarch64_prefetch : Intrinsic<[],
      ]>,
     ClangBuiltin<"__builtin_arm_prefetch">;
 
+def int_aarch64_range_prefetch : Intrinsic<[],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+    [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+     ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>,
+    ClangBuiltin<"__builtin_arm_range_prefetch">;
+
 
//===----------------------------------------------------------------------===//
 // Data Barrier Instructions
 

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 0b9a23f217218..bb552861130d2 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6768,6 +6768,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, 
CallBase &Call) {
           "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
     break;
   }
+  case Intrinsic::aarch64_range_prefetch: {
+    Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+          "write argument to llvm.aarch64.range.prefetch must be 0 or 1", 
Call);
+    Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2,
+          "stream argument to llvm.aarch64.range.prefetch must be 0 or 1",
+          Call);
+    break;
+  }
   case Intrinsic::callbr_landingpad: {
     const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
     Check(CBR, "intrinstic requires callbr operand", &Call);

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ffd9641e9f9df..74ee8ff8ab5f5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6229,6 +6229,19 @@ SDValue 
AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
                        DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
   }
+  case Intrinsic::aarch64_range_prefetch: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Addr = Op.getOperand(2);
+
+    unsigned IsWrite = Op.getConstantOperandVal(3);
+    unsigned IsStream = Op.getConstantOperandVal(4);
+    unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+    SDValue Metadata = Op.getOperand(5);
+    return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain,
+                       DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr,
+                       Metadata);
+  }
   case Intrinsic::aarch64_sme_str:
   case Intrinsic::aarch64_sme_ldr: {
     return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td 
b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 497306dabaa97..d0c08036e7d41 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction {
   let hasSideEffects = 1;
 }
 
+def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2);
+  let hasSideEffects = 1;
+}
+
 def G_UMULL : AArch64GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type0:$src1, type0:$src2);
@@ -339,6 +345,7 @@ def : GINodeEquiv<G_SRI, AArch64vsri>;
 def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
 
 def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
+def : GINodeEquiv<G_AARCH64_RANGE_PREFETCH, AArch64RangePrefetch>;
 
 def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 5a471824ef513..cb8f7c3d70afc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -538,6 +538,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, 
SDTCisSameAs<0,1>,
                                            SDTCisSameAs<0,3>]>;
 def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
 def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, 
SDTCisPtrTy<1>]>;
+def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, 
SDTCisPtrTy<1>, SDTCisVT<2, i64>]>;
 
 def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
 
@@ -1054,6 +1055,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", 
SDT_AArch64TCRET,
 def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", 
SDT_AArch64PREFETCH,
                                [SDNPHasChain, SDNPSideEffect]>;
 
+def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH",
+                                 SDT_AArch64RANGE_PREFETCH,
+                                 [SDNPHasChain, SDNPSideEffect]>;
+
 // {s|u}int to FP within a FP register.
 def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
 def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
@@ -11089,6 +11094,9 @@ def RPRFM:
   let DecoderNamespace = "Fallback";
 }
 
+def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm),
+          (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>;
+
 
//===----------------------------------------------------------------------===//
 // 128-bit Atomics (FEAT_LSE128)
 
//===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1ae0b99416a29..e067489283b24 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1755,6 +1755,20 @@ bool 
AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     MI.eraseFromParent();
     return true;
   }
+  case Intrinsic::aarch64_range_prefetch: {
+    auto &AddrVal = MI.getOperand(1);
+
+    int64_t IsWrite = MI.getOperand(2).getImm();
+    int64_t IsStream = MI.getOperand(3).getImm();
+    unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+    MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
+        .addImm(PrfOp)
+        .add(AddrVal)
+        .addUse(MI.getOperand(4).getReg()); // Metadata
+    MI.eraseFromParent();
+    return true;
+  }
   case Intrinsic::aarch64_neon_uaddv:
   case Intrinsic::aarch64_neon_saddv:
   case Intrinsic::aarch64_neon_umaxv:

diff  --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll 
b/llvm/test/CodeGen/AArch64/range-prefetch.ll
new file mode 100644
index 0000000000000..bc01498296cf3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 
--global-isel-abort=1 < %s | FileCheck %s
+
+define void @range_prefetch_metadata_accesses(ptr %a, i64 %metadata) {
+; CHECK-LABEL: range_prefetch_metadata_accesses:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rprfm pldkeep, x1, [x0]
+; CHECK-NEXT:    rprfm pstkeep, x1, [x0]
+; CHECK-NEXT:    rprfm pldstrm, x1, [x0]
+; CHECK-NEXT:    rprfm pststrm, x1, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i64 %metadata)
+  call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 0, i64 %metadata)
+  call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i64 %metadata)
+  call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 1, i64 %metadata)
+  ret void
+}

diff  --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll 
b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
new file mode 100644
index 0000000000000..e17c11d66dac4
--- /dev/null
+++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @range_prefetch(ptr %src, i64 %metadata) {
+  ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1
+  ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, 
i64 %metadata)
+  call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i64 %metadata)
+
+  ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1
+  ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, 
i64 %metadata)
+  call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i64 %metadata)
+
+  ret void
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to