https://github.com/rovka created https://github.com/llvm/llvm-project/pull/130030
This represents a hardware mode supported only for wave32 compute shaders. When enabled, we set the `.dynamic_vgpr_en` field of `.compute_registers` to true in the PAL metadata. >From b2a7bdc3954d2bf72e99d730ce00159c2550f563 Mon Sep 17 00:00:00 2001 From: Diana Picus <diana-magda.pi...@amd.com> Date: Mon, 23 Oct 2023 10:36:31 +0200 Subject: [PATCH] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode This represents a hardware mode supported only for wave32 compute shaders. When enabled, we set the `.dynamic_vgpr_en` field of `.compute_registers` to true in the PAL metadata. --- llvm/docs/AMDGPUUsage.rst | 6 ++++++ llvm/lib/Target/AMDGPU/AMDGPU.td | 6 ++++++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +++ llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll | 13 ++++++++----- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index def6addd595e8..59cc08a59ed7c 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -758,6 +758,12 @@ For example: enabled will execute correctly but may be less performant than code generated for XNACK replay disabled. + + dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12. + Waves launched in this mode may allocate or deallocate the VGPRs + using dedicated instructions, but may not send the DEALLOC_VGPRS + message. + =============== ============================ ================================================== .. _amdgpu-target-id: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index effc8d2ed6b49..31a98ee132bf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1239,6 +1239,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts", "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32" >; +def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr", + "DynamicVGPR", + "true", + "Enable dynamic VGPR mode" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 31e0bd8d652bc..13e61756e3036 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1414,6 +1414,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, MD->setHwStage(CC, ".trap_present", (bool)CurrentProgramInfo.TrapHandlerEnable); MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable); + + if (ST.isDynamicVGPREnabled()) + MD->setComputeRegisters(".dynamic_vgpr_en", true); } MD->setHwStage(CC, ".lds_size", diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 6664a70572ded..1254cbad83b60 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -190,6 +190,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// indicates a lack of S_CLAUSE support. unsigned MaxHardClauseLength = 0; bool SupportsSRAMECC = false; + bool DynamicVGPR = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. @@ -1647,6 +1648,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return true; } + bool isDynamicVGPREnabled() const { return DynamicVGPR; } + bool requiresDisjointEarlyClobberAndUndef() const override { // AMDGPU doesn't care if early-clobber and undef operands are allocated // to the same register. diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll index 7536e83a9da6b..fa22089978c2e 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR ; CHECK-LABEL: {{^}}_amdgpu_cs_main: ; CHECK: ; TotalNumSgprs: 4 @@ -8,6 +10,7 @@ ; CHECK-NEXT: amdpal.pipelines: ; CHECK-NEXT: - .api: Vulkan ; CHECK-NEXT: .compute_registers: +; DVGPR-NEXT: .dynamic_vgpr_en: true ; CHECK-NEXT: .tg_size_en: true ; CHECK-NEXT: .tgid_x_en: false ; CHECK-NEXT: .tgid_y_en: false @@ -57,7 +60,7 @@ ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 -; CHECK-NEXT: .ieee_mode: false +; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true @@ -112,7 +115,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_gs ; CHECK-NEXT: .entry_point_symbol: gs_shader -; CHECK-NEXT: .ieee_mode: false +; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x200 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false @@ -124,7 +127,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_hs ; CHECK-NEXT: .entry_point_symbol: hs_shader -; CHECK-NEXT: .ieee_mode: false +; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x1000 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false @@ -136,7 +139,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_ps ; CHECK-NEXT: .entry_point_symbol: ps_shader -; CHECK-NEXT: .ieee_mode: false +; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits