Author: Fabian Ritter Date: 2025-02-19T10:20:48+01:00 New Revision: 8615f9aaffd4337a33ea979f010c4d6410ba6125
URL: https://github.com/llvm/llvm-project/commit/8615f9aaffd4337a33ea979f010c4d6410ba6125 DIFF: https://github.com/llvm/llvm-project/commit/8615f9aaffd4337a33ea979f010c4d6410ba6125.diff LOG: [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (#126763) gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR removes all non-documentation occurrences of gfx940/gfx941 from the llvm directory, and the remaining occurrences in clang. Documentation changes will follow. For SWDEV-512631 Added: Modified: clang/test/Misc/target-invalid-cpu-note/amdgcn.c llvm/docs/AMDGPUUsage.rst llvm/include/llvm/BinaryFormat/ELF.h llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/include/llvm/TargetParser/TargetParser.h llvm/lib/Object/ELFObjectFile.cpp llvm/lib/ObjectYAML/ELFYAML.cpp llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/DSInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp llvm/lib/Target/AMDGPU/GCNProcessors.td llvm/lib/Target/AMDGPU/GCNSubtarget.h llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp llvm/lib/Target/AMDGPU/SIDefines.h llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp llvm/lib/Target/AMDGPU/SISchedule.td llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp llvm/lib/TargetParser/TargetParser.cpp llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll llvm/tools/llvm-readobj/ELFDumper.cpp Removed: ################################################################################ diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c index 642d2df211c21..9ef44b2bb403e 100644 --- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c +++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c @@ -45,8 +45,6 @@ // CHECK-SAME: {{^}}, gfx909 // CHECK-SAME: {{^}}, gfx90a // CHECK-SAME: {{^}}, gfx90c -// CHECK-SAME: {{^}}, gfx940 -// CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 // CHECK-SAME: {{^}}, gfx950 // CHECK-SAME: {{^}}, gfx1010 diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 5966d1617feee..936e8e2960bf1 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2232,7 +2232,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` - ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` + *reserved* 0x040 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` @@ -2243,7 +2243,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` *reserved* 0x049 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` - ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` + *reserved* 0x04b Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` *reserved* 0x04d Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 64f643749d6ac..37eab89e706db 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -814,7 +814,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d, EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e, EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f, - EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040, EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041, EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042, EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043, @@ -825,7 +825,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049, EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a, - EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4B = 0x04b, EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d, EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9558f2b9b74e0..1e4f25c642493 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1074,7 +1074,7 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_, // bit 0 = glc, bit 1 = slc, // bit 2 = dlc (gfx10/gfx11), // bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope !listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn], !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]), @@ -1321,7 +1321,7 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // Note: volatile bit is **not** permitted here. @@ -1351,7 +1351,7 @@ class AMDGPURawBufferLoad : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1381,7 +1381,7 @@ class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1413,7 +1413,7 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1431,7 +1431,7 @@ class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1448,7 +1448,7 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1467,7 +1467,7 @@ class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsi llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1485,7 +1485,7 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrins llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1503,7 +1503,7 @@ class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1523,7 +1523,7 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1542,7 +1542,7 @@ class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsI llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1628,7 +1628,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; -// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+. +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx942, gfx950, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>; class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic < @@ -1727,7 +1727,7 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz [IntrReadMem, @@ -1743,7 +1743,7 @@ def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1761,7 +1761,7 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1779,7 +1779,7 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1797,7 +1797,7 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1815,7 +1815,7 @@ def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1834,7 +1834,7 @@ def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1853,7 +1853,7 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1872,7 +1872,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1891,7 +1891,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1914,7 +1914,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1934,7 +1934,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -3007,7 +3007,7 @@ def int_amdgcn_fdot2_f32_bf16 : // f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + c // TODO: This actually is similar to llvm.amdgcn.fdot2 intrinsics which produces -// v_dot2c_f32_f16 on gfx940. Maybe we can consolidate these. +// v_dot2c_f32_f16 on gfx942. Maybe we can consolidate these. def int_amdgcn_fdot2c_f32_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">, @@ -3250,7 +3250,7 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, ll def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>; def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>; -// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA. +// Note: in gfx942 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA. // Three bits corresponding to the neg modifier applied to the respective // source operand. def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>; @@ -3258,7 +3258,7 @@ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, ll } //===----------------------------------------------------------------------===// -// gfx940 intrinsics +// gfx942 intrinsics // ===----------------------------------------------------------------------===// class AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> : diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 55e7b417428c4..f776b41f3d7ca 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -83,8 +83,6 @@ enum GPUKind : uint32_t { GK_GFX909 = 65, GK_GFX90A = 66, GK_GFX90C = 67, - GK_GFX940 = 68, - GK_GFX941 = 69, GK_GFX942 = 70, GK_GFX950 = 71, diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 2d3d70db50c39..ac25d76709726 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -545,10 +545,6 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx90a"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: return "gfx90c"; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: - return "gfx940"; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: - return "gfx941"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: return "gfx942"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 05e4d85b2ea5d..1f970739c1e7e 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -609,8 +609,6 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH); - BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH); - BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6439149d801f6..3aabca49b249e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1619,28 +1619,6 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureAtomicBufferPkAddBF16Inst ])>; -def FeatureISAVersion9_4_0 : FeatureSet< - !listconcat(FeatureISAVersion9_4_Common.Features, - [ - FeatureAddressableLocalMemorySize65536, - FeatureForceStoreSC0SC1, - FeatureFP8Insts, - FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, - FeatureXF32Insts - ])>; - -def FeatureISAVersion9_4_1 : FeatureSet< - !listconcat(FeatureISAVersion9_4_Common.Features, - [ - FeatureAddressableLocalMemorySize65536, - FeatureForceStoreSC0SC1, - FeatureFP8Insts, - FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, - FeatureXF32Insts - ])>; - def FeatureISAVersion9_4_2 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3bbbbcf71d8ae..cf3843869808b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4295,7 +4295,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl( // TODO: Handle G_FSUB 0 as fneg // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector. - (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard() + (void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard() // Packed instructions do not have abs modifiers. Mods |= SISrcMods::OP_SEL_1; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 9ca853befba73..d3487daee364f 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1773,7 +1773,7 @@ def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>; def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; -// GFX940+. +// GFX942+. def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index ea6e703eba5d9..7988a9ac0ce55 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -814,7 +814,7 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", } // End SubtargetPredicate = isGFX7GFX10GFX11 -// GFX940-, GFX11-only flat instructions. +// GFX942-, GFX11-only flat instructions. let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; } // End SubtargetPredicate = HasFlatAtomicFaddF32Inst @@ -2076,7 +2076,7 @@ defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; let SubtargetPredicate = isGFX8GFX9NotGFX940 in { - // These instructions are encoded diff erently on gfx90* and gfx940. + // These instructions are encoded diff erently on gfx90* and gfx94*. defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 827598078af53..1ff75095b220a 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2292,7 +2292,7 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 @@ -2600,7 +2600,7 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 @@ -2610,7 +2610,7 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index a86c76bb6075e..0b372e29efe67 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -192,15 +192,7 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel, FeatureISAVersion9_0_C.Features >; -def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel, - FeatureISAVersion9_4_0.Features ->; - -def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel, - FeatureISAVersion9_4_1.Features ->; - -def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, +def : ProcessorModel<"gfx942", SIDPGFX942FullSpeedModel, FeatureISAVersion9_4_2.Features >; @@ -213,8 +205,8 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, FeatureISAVersion9_Generic.Features >; -// [gfx940, gfx941, gfx942] -def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel, +// [gfx942] +def : ProcessorModel<"gfx9-4-generic", SIDPGFX942FullSpeedModel, FeatureISAVersion9_4_Generic.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 342b211199dca..f7c5c472c93a5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1297,11 +1297,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasPackedTID() const { return HasPackedTID; } - // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that + // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that // hasGFX90AInsts is also true. bool hasGFX940Insts() const { return GFX940Insts; } - // GFX950 is a derivation to GFX940. hasGFX950Insts() implies that + // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 059bab5838526..4a4ad712e304d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,8 +93,6 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; @@ -180,8 +178,6 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; - case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; - case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index f812ae652b63d..721601efcc804 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0] ID_EXCP_FLAG_USER = 18, ID_TRAP_CTRL = 19, - // GFX940 specific registers + // GFX94* specific registers ID_XCC_ID = 20, ID_SQ_PERF_SNAPSHOT_DATA = 21, ID_SQ_PERF_SNAPSHOT_DATA1 = 22, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e09b310d107ac..909ad07782fc6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16823,39 +16823,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // safe. The message phrasing also should be better. if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) { if (AS == AMDGPUAS::FLAT_ADDRESS) { - // gfx940, gfx12 + // gfx942, gfx12 if (Subtarget->hasAtomicFlatPkAdd16Insts() && isV2F16OrV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else if (AMDGPU::isExtendedGlobalAddrSpace(AS)) { - // gfx90a, gfx940, gfx12 + // gfx90a, gfx942, gfx12 if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // gfx940, gfx12 + // gfx942, gfx12 if (Subtarget->hasAtomicGlobalPkAddBF16Inst() && isV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else if (AS == AMDGPUAS::BUFFER_FAT_POINTER) { - // gfx90a, gfx940, gfx12 + // gfx90a, gfx942, gfx12 if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for + // While gfx90a/gfx942 supports v2bf16 for global/flat, it does not for // buffer. gfx12 does have the buffer version. if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } - // global and flat atomic fadd f64: gfx90a, gfx940. + // global and flat atomic fadd f64: gfx90a, gfx942. if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS) { if (Ty->isFloatTy()) { - // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, + // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx942, // gfx11+. if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+. + // global/buffer atomic fadd f32 rtn: gfx90a, gfx942, gfx11+. if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else { @@ -16867,7 +16867,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { } } - // flat atomic fadd f32: gfx940, gfx11+. + // flat atomic fadd f32: gfx942, gfx11+. if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { if (Subtarget->hasFlatAtomicFaddF32Inst()) return ReportUnsafeHWInst(AtomicExpansionKind::None); @@ -16906,7 +16906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // float, double restored in gfx10. // double removed again in gfx11, so only f32 for gfx11/gfx12. // - // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but + // For gfx9, gfx90a and gfx942 support f64 for global (same as fadd), but // no f32. if (AS == AMDGPUAS::FLAT_ADDRESS) { if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy()) diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index be6cff873532b..79fb36acc0ea7 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -492,7 +492,6 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl { } public: - SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 117add324db56..2a374b360b04a 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -94,7 +94,7 @@ class SISchedMachineModel : SchedMachineModel { def SIFullSpeedModel : SISchedMachineModel; def SIQuarterSpeedModel : SISchedMachineModel; def SIDPFullSpeedModel : SISchedMachineModel; -def SIDPGFX940FullSpeedModel : SISchedMachineModel; +def SIDPGFX942FullSpeedModel : SISchedMachineModel; def SIDPGFX950FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; def GFX11SpeedModel : SISchedMachineModel; @@ -276,7 +276,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; } // End SchedModel = SIDPFullSpeedModel -let SchedModel = SIDPGFX940FullSpeedModel in { +let SchedModel = SIDPGFX942FullSpeedModel in { defm : SICommonWriteRes; @@ -308,7 +308,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; -} // End SchedModel = SIDPGFX940FullSpeedModel +} // End SchedModel = SIDPGFX942FullSpeedModel let SchedModel = SIDPGFX950FullSpeedModel in { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index a8e4ce133ffbc..e433b85489e6e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -216,7 +216,7 @@ static constexpr CustomOperand Operands[] = { {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, - // GFX940 specific registers + // GFX942 specific registers {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 0a605dfd017cb..8731a16b88a5c 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -104,8 +104,6 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, - {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, - {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, @@ -260,8 +258,6 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX909: return {9, 0, 9}; case GK_GFX90A: return {9, 0, 10}; case GK_GFX90C: return {9, 0, 12}; - case GK_GFX940: return {9, 4, 0}; - case GK_GFX941: return {9, 4, 1}; case GK_GFX942: return {9, 4, 2}; case GK_GFX950: return {9, 5, 0}; case GK_GFX1010: return {10, 1, 0}; @@ -506,8 +502,6 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: - case GK_GFX941: - case GK_GFX940: Features["fp8-insts"] = true; Features["fp8-conversion-insts"] = true; if (Kind != GK_GFX950) diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll index b008f397318e8..89c9801b5e466 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \ ; RUN: | FileCheck --match-full-lines --implicit-check-not='declare' %s ; Confirms we do not leave behind a declaration which references the same diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2da08127f20a8..fdae09ac767e6 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1624,8 +1624,6 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = { ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits