https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/161988

Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.

>From e1402218d01f9269d181d69a289e80aa68d84c01 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sun, 5 Oct 2025 10:17:18 +0900
Subject: [PATCH] AMDGPU: Account for read/write register intrinsics for AGPR
 usage

Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.
---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 17 +++-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 15 +--
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  7 ++
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       | 99 +++++++++++++++++--
 4 files changed, 120 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 155b1a6a380dd..e8e39ff015eed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public 
StateWrapper<BooleanState, AbstractAttribute> {
         return false;
       }
 
-      // Some intrinsics may use AGPRs, but if we have a choice, we are not
-      // required to use AGPRs.
-      if (Callee->isIntrinsic())
+      switch (Callee->getIntrinsicID()) {
+      case Intrinsic::write_register:
+      case Intrinsic::read_register:
+      case Intrinsic::read_volatile_register: {
+        const MDString *RegName = cast<MDString>(
+            cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
+        auto [Kind, RegIdx, NumRegs] =
+            AMDGPU::parseAsmPhysRegName(RegName->getString());
+        return Kind != 'a';
+      }
+      default:
+        // Some intrinsics may use AGPRs, but if we have a choice, we are not
+        // required to use AGPRs.
         return true;
+      }
 
       // TODO: Handle callsite attributes
       const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp 
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 20fa1412a778e..8e57498c56f76 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
   return C == 'v' || C == 's' || C == 'a';
 }
 
-std::tuple<char, unsigned, unsigned>
-parseAsmConstraintPhysReg(StringRef Constraint) {
-  StringRef RegName = Constraint;
-  if (!RegName.consume_front("{") || !RegName.consume_back("}"))
-    return {};
-
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
   char Kind = RegName.front();
   if (!isValidRegPrefix(Kind))
     return {};
@@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
   return {};
 }
 
+std::tuple<char, unsigned, unsigned>
+parseAsmConstraintPhysReg(StringRef Constraint) {
+  StringRef RegName = Constraint;
+  if (!RegName.consume_front("{") || !RegName.consume_back("}"))
+    return {};
+  return parseAsmPhysRegName(RegName);
+}
+
 std::pair<unsigned, unsigned>
 getIntegerPairAttribute(const Function &F, StringRef Name,
                         std::pair<unsigned, unsigned> Default,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h 
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2b9c063f42a5e..b656414f2b85c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
 /// target triple \p TT, false otherwise.
 bool shouldEmitConstantsToTextSection(const Triple &TT);
 
+/// Returns a valid charcode or 0 in the first entry if this is a valid 
physical
+/// register name. Followed by the start register number, and the register
+/// width. Does not validate the number of registers exists in the class. 
Unlike
+/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
+/// "{}".
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef 
TupleString);
+
 /// Returns a valid charcode or 0 in the first entry if this is a valid 
physical
 /// register constraint. Followed by the start register number, and the 
register
 /// width. Does not validate the number of registers exists in the class.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll 
b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..7fcd07486d54d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -168,7 +168,7 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr 
%indirect) {
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr 
%indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void 
@kernel_calls_indirect_marked_callsite(
 ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR5]]
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -216,7 +216,7 @@ define amdgpu_kernel void 
@kernel_calls_indirect_marked_callsite(ptr %indirect)
 
 define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() {
 
 define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @empty()
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
 ; CHECK-NEXT:    call void @use_most()
@@ -616,12 +616,93 @@ define amdgpu_kernel void 
@physreg_def_a32___def_vreg_a512_use_vreg_a256() {
   ret void
 }
 
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a55", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a55", i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"v55", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"v55", i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i96(metadata !"a[55:57]", i96 
0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) 
%ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata 
!"a55")
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_register.i64(metadata !"a55")
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr 
addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void 
@kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 
@llvm.read_volatile_register.i32(metadata !"a55")
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) 
%ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata 
!"a[56:59]")
+; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    ret void
+;
+  %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
+  store i128 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void 
@kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a256", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a256", i32 0)
+  ret void
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" 
"target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" 
"uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync 
nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind 
speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind 
willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" 
"amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" 
"amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" 
"amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" 
"amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" 
"amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" 
"amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" 
"amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" 
"uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" 
"amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" 
"amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" 
"amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" 
"amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" 
"amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" 
"amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" 
"amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" 
"uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind 
speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind 
willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind 
willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind 
"target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
 ;.

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to