https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/161988
Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this. >From e1402218d01f9269d181d69a289e80aa68d84c01 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sun, 5 Oct 2025 10:17:18 +0900 Subject: [PATCH] AMDGPU: Account for read/write register intrinsics for AGPR usage Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this. --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 17 +++- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 15 +-- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 7 ++ .../AMDGPU/amdgpu-attributor-no-agpr.ll | 99 +++++++++++++++++-- 4 files changed, 120 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 155b1a6a380dd..e8e39ff015eed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { return false; } - // Some intrinsics may use AGPRs, but if we have a choice, we are not - // required to use AGPRs. - if (Callee->isIntrinsic()) + switch (Callee->getIntrinsicID()) { + case Intrinsic::write_register: + case Intrinsic::read_register: + case Intrinsic::read_volatile_register: { + const MDString *RegName = cast<MDString>( + cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata()); + auto [Kind, RegIdx, NumRegs] = + AMDGPU::parseAsmPhysRegName(RegName->getString()); + return Kind != 'a'; + } + default: + // Some intrinsics may use AGPRs, but if we have a choice, we are not + // required to use AGPRs. return true; + } // TODO: Handle callsite attributes const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>( diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 20fa1412a778e..8e57498c56f76 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) { return C == 'v' || C == 's' || C == 'a'; } -std::tuple<char, unsigned, unsigned> -parseAsmConstraintPhysReg(StringRef Constraint) { - StringRef RegName = Constraint; - if (!RegName.consume_front("{") || !RegName.consume_back("}")) - return {}; - +std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) { char Kind = RegName.front(); if (!isValidRegPrefix(Kind)) return {}; @@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) { return {}; } +std::tuple<char, unsigned, unsigned> +parseAsmConstraintPhysReg(StringRef Constraint) { + StringRef RegName = Constraint; + if (!RegName.consume_front("{") || !RegName.consume_back("}")) + return {}; + return parseAsmPhysRegName(RegName); +} + std::pair<unsigned, unsigned> getIntegerPairAttribute(const Function &F, StringRef Name, std::pair<unsigned, unsigned> Default, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 2b9c063f42a5e..b656414f2b85c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV); /// target triple \p TT, false otherwise. bool shouldEmitConstantsToTextSection(const Triple &TT); +/// Returns a valid charcode or 0 in the first entry if this is a valid physical +/// register name. Followed by the start register number, and the register +/// width. Does not validate the number of registers exists in the class. Unlike +/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in +/// "{}". +std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString); + /// Returns a valid charcode or 0 in the first entry if this is a valid physical /// register constraint. Followed by the start register number, and the register /// width. Does not validate the number of registers exists in the class. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index f19d563067eb2..7fcd07486d54d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -168,7 +168,7 @@ declare void @unknown() define amdgpu_kernel void @kernel_calls_extern() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void @@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() { define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]] ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; @@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]] +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]] ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; @@ -216,7 +216,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void @@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() { define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @empty() ; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() ; CHECK-NEXT: call void @use_most() @@ -616,12 +616,93 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() { ret void } +define amdgpu_kernel void @kernel_uses_write_register_a55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a55", i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_v55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55( +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"v55", i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55_57() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55") +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_register.i64(metadata !"a55") + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55") +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55") + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]") +; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: ret void +; + %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]") + store i128 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a256", i32 0) + ret void +} + attributes #0 = { "amdgpu-agpr-alloc"="0" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" } ;. _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
