https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/182107
From 9b7f8e40c4165ceaf65aef5e0542693c35591289 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <[email protected]> Date: Wed, 18 Feb 2026 20:18:41 +0100 Subject: [PATCH 1/5] [AMDGPU] Remove DX10_CLAMP and IEEE bits from gfx1170 --- llvm/docs/AMDGPUUsage.rst | 14 ++- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 10 +- .../Disassembler/AMDGPUDisassembler.cpp | 8 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 1 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 +- .../MCTargetDesc/AMDGPUMCKernelDescriptor.cpp | 2 +- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 + llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + .../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 8 +- .../AMDGPU/amdpal-msgpack-dx10-clamp-on.ll | 100 ++++++++++++++++++ .../AMDGPU/amdpal-msgpack-dx10-clamp.ll | 19 +++- .../CodeGen/AMDGPU/amdpal-msgpack-ieee.ll | 8 ++ llvm/test/MC/AMDGPU/hsa-diag-v4.s | 8 +- 14 files changed, 168 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 50a1fba755ba1..23a8581b40522 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1992,11 +1992,11 @@ The AMDGPU backend supports the following LLVM IR attributes. "amdgpu-flat-work-group-size" value, the implied occupancy bounds by the workgroup size takes precedence. - "amdgpu-ieee" true/false. GFX6-GFX11 Only + "amdgpu-ieee" true/false. GFX6-GFX11 (Except GFX1170) Only Specify whether the function expects the IEEE field of the mode register to be set on entry. Overrides the default for the calling convention. - "amdgpu-dx10-clamp" true/false. GFX6-GFX11 Only + "amdgpu-dx10-clamp" true/false. GFX6-GFX11 (Except GFX1170) Only Specify whether the function expects the DX10_CLAMP field of the mode register to be set on entry. Overrides the default for the calling convention. @@ -5776,7 +5776,7 @@ The fields used by CP for code objects before V3 also match those specified in CP is responsible for filling in ``COMPUTE_PGM_RSRC1.PRIV``. - 21 1 bit ENABLE_DX10_CLAMP GFX9-GFX11 + 21 1 bit ENABLE_DX10_CLAMP GFX9-GFX11 (except GFX1170) Wavefront starts execution with DX10 clamp mode enabled. Used by the vector @@ -5788,6 +5788,8 @@ The fields used by CP for code objects before V3 also match those specified in Used by CP to set up ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. + GFX1170 + Reserved. Must be 0. WG_RR_EN GFX12 If 1, wavefronts are scheduled in a round-robin fashion with @@ -21518,9 +21520,11 @@ terminated by an ``.end_amdhsa_kernel`` directive. Possible values are defined in :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. ``.amdhsa_dx10_clamp`` 1 GFX6-GFX11 Controls ENABLE_DX10_CLAMP in - :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. + (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. + GFX1170) ``.amdhsa_ieee_mode`` 1 GFX6-GFX11 Controls ENABLE_IEEE_MODE in - :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. + (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. + GFX1170) ``.amdhsa_round_robin_scheduling`` 0 GFX12 Controls ENABLE_WG_RR_EN in :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. ``.amdhsa_fp16_overflow`` 0 GFX9-GFX12 Controls FP16_OVFL in diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 01cc4ff4ae854..feb858e1b1e1d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -6236,14 +6236,16 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { - if (IVersion.Major >= 12) - return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); + if ((IVersion.Major == 11 && IVersion.Minor >= 7) || IVersion.Major >= 12) + return Error(IDRange.Start, "directive unsupported on gfx1170+", + IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, ValRange); } else if (ID == ".amdhsa_ieee_mode") { - if (IVersion.Major >= 12) - return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); + if ((IVersion.Major == 11 && IVersion.Minor >= 7) || IVersion.Major >= 12) + return Error(IDRange.Start, "directive unsupported on gfx1170+", + IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, ValRange); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c7b4ec369db0..72eac4848cd89 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2257,6 +2257,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const { bool AMDGPUDisassembler::isGFX1170() const { return AMDGPU::isGFX1170(STI); } +bool AMDGPUDisassembler::isGFX1170Plus() const { + return AMDGPU::isGFX1170Plus(STI); +} + bool AMDGPUDisassembler::isGFX12() const { return STI.hasFeature(AMDGPU::FeatureGFX12); } @@ -2411,13 +2415,13 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV); - if (!isGFX12Plus()) + if (!isGFX1170Plus()) PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE); - if (!isGFX12Plus()) + if (!isGFX1170Plus()) PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index b01eb8dd59fad..3cec0e86290e5 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -180,6 +180,7 @@ class AMDGPUDisassembler : public MCDisassembler { bool isGFX11() const; bool isGFX1170() const; bool isGFX11Plus() const; + bool isGFX1170Plus() const; bool isGFX12() const; bool isGFX12Plus() const; bool isGFX1250() const; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 5dac0c5dd9bd7..473994b9bcabb 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -400,6 +400,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() == GFX11 && hasWMMA128bInsts(); } + bool isGFX1170Plus() const { return getGeneration() >= GFX12 || isGFX1170(); } + bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; } bool hasAtomicFaddInsts() const { @@ -686,10 +688,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasSplitBarriers() const { return getGeneration() >= GFX12; } // \returns true if the target has DX10_CLAMP kernel descriptor mode bit - bool hasDX10ClampMode() const { return getGeneration() < GFX12; } + bool hasDX10ClampMode() const { return !isGFX1170Plus(); } // \returns true if the target has IEEE kernel descriptor mode bit - bool hasIEEEMode() const { return getGeneration() < GFX12; } + bool hasIEEEMode() const { return !isGFX1170Plus(); } // \returns true if the target has WG_RR_MODE kernel descriptor mode bit bool hasRrWGMode() const { return getGeneration() >= GFX12; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp index b467dbb2cd519..b35eaf4d8e2ff 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp @@ -40,7 +40,7 @@ MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); - if (Version.Major < 12) { + if (Version.Major < 11 || (Version.Major == 11 && Version.Minor < 7)) { MCKernelDescriptor::bits_set( KD.compute_pgm_rsrc1, OneMCExpr, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 86c5d1c3a2532..b8639bbaf60e7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -559,7 +559,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ".amdhsa_float_denorm_mode_16_64"); - if (IVersion.Major < 12) { + if (IVersion.Major < 11 || (IVersion.Major == 11 && IVersion.Minor < 7)) { PrintField(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 8ad424a255fc1..802bf7dc25b03 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2606,6 +2606,10 @@ bool isGFX11Plus(const MCSubtargetInfo &STI) { return isGFX11(STI) || isGFX12Plus(STI); } +bool isGFX1170Plus(const MCSubtargetInfo &STI) { + return isGFX1170(STI) || isGFX12Plus(STI); +} + bool isGFX12(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index fa24383c90fa6..e2cf5e6ee2b44 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1707,6 +1707,7 @@ bool isGFX10Before1030(const MCSubtargetInfo &STI); bool isGFX11(const MCSubtargetInfo &STI); bool isGFX1170(const MCSubtargetInfo &STI); bool isGFX11Plus(const MCSubtargetInfo &STI); +bool isGFX1170Plus(const MCSubtargetInfo &STI); bool isGFX12(const MCSubtargetInfo &STI); bool isGFX12Plus(const MCSubtargetInfo &STI); bool isGFX1250(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 24251e12d57dc..1a0b0b0c48847 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -383,13 +383,13 @@ void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) { if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value)) return; - if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) { - Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+"); + if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX1170Plus(*STI)) { + Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX1170+"); return; } - if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) { - Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+"); + if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX1170Plus(*STI)) { + Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX1170+"); return; } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll new file mode 100644 index 0000000000000..5aa4a127ada39 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll @@ -0,0 +1,100 @@ +; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s + +; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata +; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f0000{{$}} +; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} +; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} +define amdgpu_cs half @cs_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata +; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f0000{{$}} +; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} +; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} +define amdgpu_es half @es_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata +; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f0000{{$}} +; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} +; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} +define amdgpu_gs half @gs_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata +; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f0000{{$}} +; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} +; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} +define amdgpu_hs half @hs_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata +; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f0000{{$}} +; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} +; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} +define amdgpu_ls half @ls_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal metadata +; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f0000{{$}} +; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} +; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} +define amdgpu_ps half @ps_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdpal vertex shader: check for 0x2c4a (SPI_SHADER_PGM_RSRC1_VS) in pal metadata +; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f0000{{$}} +; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f02c0{{$}} +; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f0000{{$}} +; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} +; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} +define amdgpu_vs half @vs_amdpal(half %arg0) #0 { + %add = fadd half %arg0, 1.0 + ret half %add +} + +attributes #0 = { "amdgpu-dx10-clamp"="true" } + +; amdgpu.pal.metadata.msgpack represents this: +; +; .amdgpu_pal_metadata +; --- +; amdpal.pipelines: +; - .internal_pipeline_hash: +; - 0x123456789abcdef0 +; - 0xfedcba9876543210 +; .registers: +; '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x42000000 +; ... +; .end_amdgpu_pal_metadata + +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!"\81\b0\61\6d\64\70\61\6c\2e\70\69\70\65\6c\69\6e\65\73\91\82\b7\2e\69\6e\74\65\72\6e\61\6c\5f\70\69\70\65\6c\69\6e\65\5f\68\61\73\68\92\cf\12\34\56\78\9a\bc\de\f0\cf\fe\dc\ba\98\76\54\32\10\aa\2e\72\65\67\69\73\74\65\72\73\81\cd\2c\0b\ce\42\00\00\00"}; diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll index cc30461901c84..24e52594176b3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll @@ -1,11 +1,15 @@ ; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s ; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata ; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf0000{{$}} ; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf02c0{{$}} ; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf0000{{$}} +; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} +; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} define amdgpu_cs half @cs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add @@ -15,6 +19,8 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} ; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf02c0{{$}} ; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} +; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} +; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} define amdgpu_es half @es_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add @@ -24,6 +30,8 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 { ; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf0000{{$}} ; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf02c0{{$}} ; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf0000{{$}} +; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} +; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} define amdgpu_gs half @gs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add @@ -33,6 +41,8 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf0000{{$}} ; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf02c0{{$}} ; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf0000{{$}} +; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} +; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} define amdgpu_hs half @hs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add @@ -42,25 +52,30 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} ; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf02c0{{$}} ; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} +; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} +; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} define amdgpu_ls half @ls_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add } ; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal metadata -; below. ; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf0000{{$}} ; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf02c0{{$}} ; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf0000{{$}} +; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} +; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} define amdgpu_ps half @ps_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add } -; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata +; amdpal vertex shader: check for 0x2c4a (SPI_SHADER_PGM_RSRC1_VS) in pal metadata ; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf0000{{$}} ; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf02c0{{$}} ; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf0000{{$}} +; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} +; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} define amdgpu_vs half @vs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll index f8978dad5d60c..1dbee82cd407d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll @@ -1,12 +1,14 @@ ; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s ; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata ; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}} ; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} ; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} define amdgpu_cs half @cs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -17,6 +19,7 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf0000{{$}} ; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} ; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}} define amdgpu_es half @es_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -27,6 +30,7 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 { ; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf0000{{$}} ; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} ; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}} define amdgpu_gs half @gs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -37,6 +41,7 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf0000{{$}} ; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} ; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}} define amdgpu_hs half @hs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -47,6 +52,7 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 { ; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf0000{{$}} ; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} ; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}} define amdgpu_ls half @ls_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -58,6 +64,7 @@ define amdgpu_ls half @ls_amdpal(half %arg0) #0 { ; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf0000{{$}} ; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} ; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}} define amdgpu_ps half @ps_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 @@ -68,6 +75,7 @@ define amdgpu_ps half @ps_amdpal(half %arg0) #0 { ; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf0000{{$}} ; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf0000{{$}} +; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} ; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}} define amdgpu_vs half @vs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s index 2fb6a4d3e458e..cdc621ca00733 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -2,6 +2,7 @@ // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX10,AMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX11,AMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1200 %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx1200 --check-prefixes=ALL,GCN,GFX10PLUS,GFX12,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1170 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX1170,AMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,NONAMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx90a --check-prefixes=ALL,GFX90A,PREGFX10,NOWGP,AMDHSA // RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1250 %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx1250 --check-prefixes=ALL,GCN,GFX10PLUS,GFX12,NOWGP,AMDHSA @@ -13,6 +14,7 @@ // GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+ // GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100 // GFX12: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--[[MCPU]] +// GFX1170: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1170 // NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810 .warning "test_target" .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" @@ -290,7 +292,8 @@ .end_amdhsa_kernel // GCN-LABEL: warning: test_amdhsa_dx10_clamp_bit -// GFX12: error: directive unsupported on gfx12+ +// GFX1170: error: directive unsupported on gfx1170+ +// GFX12: error: directive unsupported on gfx1170+ .warning "test_amdhsa_dx10_clamp_bit" .amdhsa_kernel test_amdhsa_dx10_clamp_bit .amdhsa_next_free_vgpr 32 @@ -299,7 +302,8 @@ .end_amdhsa_kernel // GCN-LABEL: warning: test_amdhsa_ieee_mode_bit -// GFX12: error: directive unsupported on gfx12+ +// GFX1170: error: directive unsupported on gfx1170+ +// GFX12: error: directive unsupported on gfx1170+ .warning "test_amdhsa_ieee_mode_bit" .amdhsa_kernel test_amdhsa_ieee_mode_bit .amdhsa_next_free_vgpr 32 From 89be95b16cd9838298d6ad9daf9fc108318106a6 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <[email protected]> Date: Thu, 19 Feb 2026 15:34:51 +0100 Subject: [PATCH 2/5] Add subtarget feature --- llvm/lib/Target/AMDGPU/AMDGPU.td | 24 ++++++++++++------- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 ++-- .../Disassembler/AMDGPUDisassembler.cpp | 4 ++-- llvm/lib/Target/AMDGPU/GCNProcessors.td | 5 ++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- .../MCTargetDesc/AMDGPUMCKernelDescriptor.cpp | 2 +- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 ++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + .../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 4 ++-- 10 files changed, 34 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 07fb32173c2a3..a9093228218c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -160,6 +160,10 @@ defm RelaxedBufferOOBMode : AMDGPUSubtargetFeature<"relaxed-buffer-oob-mode", "cause an adjacent access to be treated as if it were also OOB" >; +defm DX10ClampAndIEEEMode : AMDGPUSubtargetFeature<"dx10-clamp-and-ieee-mode", + "Target has DX10_CLAMP and IEEE_MODE kernel descriptor bits" +>; + defm ApertureRegs : AMDGPUSubtargetFeature<"aperture-regs", "Has Memory Aperture Base and Size Registers", /*GenPredicate=*/0 @@ -1328,7 +1332,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, - FeatureSadInsts, FeatureCvtPkNormVOP2Insts + FeatureSadInsts, FeatureCvtPkNormVOP2Insts, FeatureDX10ClampAndIEEEMode ] >; @@ -1343,7 +1347,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, - FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts + FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts, + FeatureDX10ClampAndIEEEMode ] >; @@ -1362,7 +1367,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, - FeatureCvtPkNormVOP2Insts + FeatureCvtPkNormVOP2Insts, FeatureDX10ClampAndIEEEMode ] >; @@ -1384,7 +1389,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureVMemToLDSLoad, FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, - FeatureCvtPkNormVOP3Insts + FeatureCvtPkNormVOP3Insts, FeatureDX10ClampAndIEEEMode ] >; @@ -1411,7 +1416,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureVmemWriteVgprInOrder, FeatureVMemToLDSLoad, FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, - FeatureCvtPkNormVOP3Insts + FeatureCvtPkNormVOP3Insts, FeatureDX10ClampAndIEEEMode ] >; @@ -1849,7 +1854,8 @@ def FeatureISAVersion11_Generic: FeatureSet< FeatureRequiresCOV6, FeatureRequiredExportPriority, FeatureDot5Insts, - FeatureWMMA256bInsts])>; + FeatureWMMA256bInsts, + FeatureDX10ClampAndIEEEMode])>; def FeatureISAVersion11_0_Common : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, @@ -1858,7 +1864,8 @@ def FeatureISAVersion11_0_Common : FeatureSet< FeatureMADIntraFwdBug, FeaturePrivEnabledTrap2NopBug, FeatureDot5Insts, - FeatureWMMA256bInsts])>; + FeatureWMMA256bInsts, + FeatureDX10ClampAndIEEEMode])>; def FeatureISAVersion11_0_0 : FeatureSet< !listconcat(FeatureISAVersion11_0_Common.Features, @@ -1883,7 +1890,8 @@ def FeatureISAVersion11_5_Common : FeatureSet< FeatureDPPSrc1SGPR, FeatureRequiredExportPriority, FeatureDot5Insts, - FeatureWMMA256bInsts])>; + FeatureWMMA256bInsts, + FeatureDX10ClampAndIEEEMode])>; def FeatureISAVersion11_5_0 : FeatureSet< !listconcat(FeatureISAVersion11_5_Common.Features, diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index feb858e1b1e1d..e368b2d4cbecb 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -6236,14 +6236,14 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { - if ((IVersion.Major == 11 && IVersion.Minor >= 7) || IVersion.Major >= 12) + if (!AMDGPU::hasDX10ClampAndIEEEMode(getSTI())) return Error(IDRange.Start, "directive unsupported on gfx1170+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, ValRange); } else if (ID == ".amdhsa_ieee_mode") { - if ((IVersion.Major == 11 && IVersion.Minor >= 7) || IVersion.Major >= 12) + if (!AMDGPU::hasDX10ClampAndIEEEMode(getSTI())) return Error(IDRange.Start, "directive unsupported on gfx1170+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 72eac4848cd89..d1817fcf02875 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2415,13 +2415,13 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV); - if (!isGFX1170Plus()) + if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE); - if (!isGFX1170Plus()) + if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 9949208fa8c90..b2755abd56fea 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,12 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [] + [FeatureDX10ClampAndIEEEMode] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureFlatAddressSpace] + [FeatureFlatAddressSpace, + FeatureDX10ClampAndIEEEMode] >; //===------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 473994b9bcabb..20b4c89ce6e28 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -688,10 +688,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasSplitBarriers() const { return getGeneration() >= GFX12; } // \returns true if the target has DX10_CLAMP kernel descriptor mode bit - bool hasDX10ClampMode() const { return !isGFX1170Plus(); } + bool hasDX10ClampMode() const { return hasDX10ClampAndIEEEMode(); } // \returns true if the target has IEEE kernel descriptor mode bit - bool hasIEEEMode() const { return !isGFX1170Plus(); } + bool hasIEEEMode() const { return hasDX10ClampAndIEEEMode(); } // \returns true if the target has WG_RR_MODE kernel descriptor mode bit bool hasRrWGMode() const { return getGeneration() >= GFX12; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp index b35eaf4d8e2ff..38b09c9a89bf2 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp @@ -40,7 +40,7 @@ MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); - if (Version.Major < 11 || (Version.Major == 11 && Version.Minor < 7)) { + if (AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { MCKernelDescriptor::bits_set( KD.compute_pgm_rsrc1, OneMCExpr, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index b8639bbaf60e7..b4978c6ec81dd 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -559,7 +559,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ".amdhsa_float_denorm_mode_16_64"); - if (IVersion.Major < 11 || (IVersion.Major == 11 && IVersion.Minor < 7)) { + if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) { PrintField(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 802bf7dc25b03..7eb3639a7c54f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2610,6 +2610,10 @@ bool isGFX1170Plus(const MCSubtargetInfo &STI) { return isGFX1170(STI) || isGFX12Plus(STI); } +bool hasDX10ClampAndIEEEMode(const MCSubtargetInfo &STI) { + return STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode); +} + bool isGFX12(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index e2cf5e6ee2b44..b9e317c1c15e7 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1728,6 +1728,7 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); bool hasVOPD(const MCSubtargetInfo &STI); bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); +bool hasDX10ClampAndIEEEMode(const MCSubtargetInfo &STI); inline bool supportsWave32(const MCSubtargetInfo &STI) { return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 1a0b0b0c48847..19e7c4aad92ef 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -383,12 +383,12 @@ void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) { if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value)) return; - if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX1170Plus(*STI)) { + if (G_00B848_DX10_CLAMP(Value) && !AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX1170+"); return; } - if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX1170Plus(*STI)) { + if (G_00B848_IEEE_MODE(Value) && !AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX1170+"); return; } From d57d290ceb87bac4b618b7c9376001342052d4c6 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <[email protected]> Date: Thu, 19 Feb 2026 15:40:50 +0100 Subject: [PATCH 3/5] remove unused isGFX1170Plus helpers --- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 4 ---- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 1 - llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 -- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 ---- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 - 5 files changed, 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index d1817fcf02875..8ef44f9de844a 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2257,10 +2257,6 @@ bool AMDGPUDisassembler::isGFX11Plus() const { bool AMDGPUDisassembler::isGFX1170() const { return AMDGPU::isGFX1170(STI); } -bool AMDGPUDisassembler::isGFX1170Plus() const { - return AMDGPU::isGFX1170Plus(STI); -} - bool AMDGPUDisassembler::isGFX12() const { return STI.hasFeature(AMDGPU::FeatureGFX12); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 3cec0e86290e5..b01eb8dd59fad 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -180,7 +180,6 @@ class AMDGPUDisassembler : public MCDisassembler { bool isGFX11() const; bool isGFX1170() const; bool isGFX11Plus() const; - bool isGFX1170Plus() const; bool isGFX12() const; bool isGFX12Plus() const; bool isGFX1250() const; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 20b4c89ce6e28..fc040367a538e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -400,8 +400,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() == GFX11 && hasWMMA128bInsts(); } - bool isGFX1170Plus() const { return getGeneration() >= GFX12 || isGFX1170(); } - bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; } bool hasAtomicFaddInsts() const { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 7eb3639a7c54f..294edd87b95d8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2606,10 +2606,6 @@ bool isGFX11Plus(const MCSubtargetInfo &STI) { return isGFX11(STI) || isGFX12Plus(STI); } -bool isGFX1170Plus(const MCSubtargetInfo &STI) { - return isGFX1170(STI) || isGFX12Plus(STI); -} - bool hasDX10ClampAndIEEEMode(const MCSubtargetInfo &STI) { return STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index b9e317c1c15e7..c12cebd0c4f73 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1707,7 +1707,6 @@ bool isGFX10Before1030(const MCSubtargetInfo &STI); bool isGFX11(const MCSubtargetInfo &STI); bool isGFX1170(const MCSubtargetInfo &STI); bool isGFX11Plus(const MCSubtargetInfo &STI); -bool isGFX1170Plus(const MCSubtargetInfo &STI); bool isGFX12(const MCSubtargetInfo &STI); bool isGFX12Plus(const MCSubtargetInfo &STI); bool isGFX1250(const MCSubtargetInfo &STI); From 00d210b8f70856ea0d0e25a4e64b2369ac3e3679 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <[email protected]> Date: Mon, 23 Feb 2026 15:42:01 +0100 Subject: [PATCH 4/5] remove from generic processors; use hasFeature; update tests; eliminate helpers; merge into one check --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 ++-- .../Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 4 ++-- llvm/lib/Target/AMDGPU/GCNProcessors.td | 5 ++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 ------ .../AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 3 ++- llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp | 4 +--- llvm/lib/Target/AMDGPU/SIProgramInfo.cpp | 10 ++++------ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 ---- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 - llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 8 ++++++-- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll | 2 +- llvm/test/CodeGen/AMDGPU/amdpal-msgpack-default.ll | 2 +- llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll | 2 +- .../CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll | 2 +- llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll | 2 +- llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll | 2 +- llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll | 2 +- llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir | 2 +- 22 files changed, 33 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7d2df427ddd60..72f29ed391ac2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1448,7 +1448,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize) { - if (ST.hasIEEEMode()) + if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode); MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 52dbd16f80f41..acac091528415 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2125,10 +2125,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo( MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs; } - if (ST.hasIEEEMode()) + if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { MFI->Mode.IEEE = YamlMFI.Mode.IEEE; - if (ST.hasDX10ClampMode()) MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp; + } // FIXME: Move proper support for denormal-fp-math into base MachineFunction MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index d4a6838ae4896..2db1c2325c05e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1631,8 +1631,8 @@ void GCNTTIImpl::collectKernelLaunchBounds( GCNTTIImpl::KnownIEEEMode GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const { - if (!ST->hasIEEEMode()) // Only mode on gfx12 - return KnownIEEEMode::On; + if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) + return KnownIEEEMode::On; // Only mode on gfx1170+ const Function *F = I.getFunction(); if (!F) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e368b2d4cbecb..e74fdf9211eaf 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -6236,14 +6236,14 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { - if (!AMDGPU::hasDX10ClampAndIEEEMode(getSTI())) + if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) return Error(IDRange.Start, "directive unsupported on gfx1170+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, ValRange); } else if (ID == ".amdhsa_ieee_mode") { - if (!AMDGPU::hasDX10ClampAndIEEEMode(getSTI())) + if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) return Error(IDRange.Start, "directive unsupported on gfx1170+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 8ef44f9de844a..5b260ac12c7a5 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2411,13 +2411,13 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV); - if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) + if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE); - if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) + if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index b2755abd56fea..9949208fa8c90 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,12 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureDX10ClampAndIEEEMode] + [] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureFlatAddressSpace, - FeatureDX10ClampAndIEEEMode] + [FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index fc040367a538e..52ff11ee301de 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -685,12 +685,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if the target has split barriers feature bool hasSplitBarriers() const { return getGeneration() >= GFX12; } - // \returns true if the target has DX10_CLAMP kernel descriptor mode bit - bool hasDX10ClampMode() const { return hasDX10ClampAndIEEEMode(); } - - // \returns true if the target has IEEE kernel descriptor mode bit - bool hasIEEEMode() const { return hasDX10ClampAndIEEEMode(); } - // \returns true if the target has WG_RR_MODE kernel descriptor mode bit bool hasRrWGMode() const { return getGeneration() >= GFX12; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp index 38b09c9a89bf2..296b248dfa23a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp @@ -40,7 +40,7 @@ MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); - if (AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { + if (STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { MCKernelDescriptor::bits_set( KD.compute_pgm_rsrc1, OneMCExpr, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index b4978c6ec81dd..26c7710a5bcda 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -13,6 +13,7 @@ #include "AMDGPUTargetStreamer.h" #include "AMDGPUMCExpr.h" #include "AMDGPUMCKernelDescriptor.h" +#include "AMDGPUMCTargetDesc.h" #include "AMDGPUPTNote.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" @@ -559,7 +560,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ".amdhsa_float_denorm_mode_16_64"); - if (AMDGPU::hasDX10ClampAndIEEEMode(STI)) { + if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { PrintField(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp index 9a58382e13c6e..f9313ff96c988 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp @@ -15,13 +15,11 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST) { *this = getDefaultForCallingConv(F.getCallingConv()); - if (ST.hasIEEEMode()) { + if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); if (!IEEEAttr.empty()) IEEE = IEEEAttr == "true"; - } - if (ST.hasDX10ClampMode()) { StringRef DX10ClampAttr = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); if (!DX10ClampAttr.empty()) diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp index 93ba0a337d7dd..a3f261b87e80b 100644 --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp @@ -89,11 +89,10 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) | S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress); - if (ST.hasDX10ClampMode()) + if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); - - if (ST.hasIEEEMode()) Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); + } if (ST.hasRrWGMode()) Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); @@ -108,11 +107,10 @@ static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, S_00B848_PRIV(ProgInfo.Priv) | S_00B848_DEBUG_MODE(ProgInfo.DebugMode); - if (ST.hasDX10ClampMode()) + if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); - - if (ST.hasIEEEMode()) Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); + } if (ST.hasRrWGMode()) Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 294edd87b95d8..8ad424a255fc1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2606,10 +2606,6 @@ bool isGFX11Plus(const MCSubtargetInfo &STI) { return isGFX11(STI) || isGFX12Plus(STI); } -bool hasDX10ClampAndIEEEMode(const MCSubtargetInfo &STI) { - return STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode); -} - bool isGFX12(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c12cebd0c4f73..fa24383c90fa6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1727,7 +1727,6 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); bool hasVOPD(const MCSubtargetInfo &STI); bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); -bool hasDX10ClampAndIEEEMode(const MCSubtargetInfo &STI); inline bool supportsWave32(const MCSubtargetInfo &STI) { return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 19e7c4aad92ef..e1db8e43a3de9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -12,6 +12,7 @@ #include "AMDKernelCodeTUtils.h" #include "AMDKernelCodeT.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/SIDefinesUtils.h" @@ -21,6 +22,7 @@ #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -383,12 +385,14 @@ void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) { if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value)) return; - if (G_00B848_DX10_CLAMP(Value) && !AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { + if (G_00B848_DX10_CLAMP(Value) && + !STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX1170+"); return; } - if (G_00B848_IEEE_MODE(Value) && !AMDGPU::hasDX10ClampAndIEEEMode(*STI)) { + if (G_00B848_IEEE_MODE(Value) && + !STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) { Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX1170+"); return; } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 5f98000d19e4d..ffac4b8b4c944 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-default.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-default.ll index 154e1e0c02c7b..ab99d84e405d5 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-default.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-default.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll index b77a3a4c3f504..f99f876391082 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll index 5aa4a127ada39..cb67cb8323107 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll index 24e52594176b3..d6967e837d55d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll index 1dbee82cd407d..a352478f4b8c4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll index 7956670a16530..732051972c14a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: ; GCN: .amdhsa_dx10_clamp 1 diff --git a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir index 91950bffd9df4..1bcddf7d35946 100644 --- a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-fold-operands -mattr=+dx10-clamp-and-ieee-mode %s -o - | FileCheck -check-prefix=GCN %s --- From 539f3cb55f4b2a931a9b471861b99a5d77fa873b Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <[email protected]> Date: Mon, 23 Feb 2026 16:31:25 +0100 Subject: [PATCH 5/5] update test --- clang/test/CodeGenOpenCL/amdgpu-ieee.cl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/test/CodeGenOpenCL/amdgpu-ieee.cl b/clang/test/CodeGenOpenCL/amdgpu-ieee.cl index 0a7b0d4f494ef..a29ab47066958 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-ieee.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -1,19 +1,24 @@ // REQUIRES: amdgpu-registered-target // // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -target-feature +dx10-clamp-and-ieee-mode \ // RUN: | FileCheck -check-prefixes=COMMON,ON %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -target-feature +dx10-clamp-and-ieee-mode \ // RUN: -mno-amdgpu-ieee -menable-no-nans \ // RUN: | FileCheck -check-prefixes=COMMON,OFF %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -target-feature +dx10-clamp-and-ieee-mode \ // RUN: -mno-amdgpu-ieee -cl-fast-relaxed-math \ // RUN: | FileCheck -check-prefixes=COMMON,OFF %s // Check AMDGCN ISA generation. // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -target-feature +dx10-clamp-and-ieee-mode \ // RUN: | FileCheck -check-prefixes=ISA-ON %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -target-feature +dx10-clamp-and-ieee-mode \ // RUN: -mno-amdgpu-ieee -menable-no-nans \ // RUN: | FileCheck -check-prefixes=ISA-OFF %s _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
