Author: Chinmay Deshpande Date: 2026-03-18T13:52:34-07:00 New Revision: e044c4ad81f09bba5d31463b2b63f0c27cfc1aca
URL: https://github.com/llvm/llvm-project/commit/e044c4ad81f09bba5d31463b2b63f0c27cfc1aca DIFF: https://github.com/llvm/llvm-project/commit/e044c4ad81f09bba5d31463b2b63f0c27cfc1aca.diff LOG: [AMDGPU] Add target features for SWMMAC instructions (#185785) Introduce `swmmac-gfx1200-insts` and `swmmac-gfx1250-insts` Added: clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl Modified: clang/include/clang/Basic/BuiltinsAMDGPU.td llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/VOP3PInstructions.td llvm/lib/TargetParser/TargetParser.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index acd0a34a79253..20e2af6aaf700 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -828,29 +828,29 @@ def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector let ArgNames = ["a_sign", "a", "b_sign", "b", "c", "clamp"]; } -def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">; - -def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; -def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">; + +def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; +def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">; def __builtin_amdgcn_prng_b32 : AMDGPUBuiltin<"unsigned int(unsigned int)", [Const], "prng-inst">; def __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUBuiltin<"_ExtVector<6, unsigned int>(_ExtVector<32, _Float16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">; @@ -1170,20 +1170,20 @@ def __builtin_amdgcn_wmma_scale16_f32_32x16x128_f4 : AMDGPUBuiltin<"_ExtVector<1 let Documentation = [DocWMMA_scale16_GFX1250]; let ArgNames = ["a", "b", "c_mod", "c", "matrix_a_scale", "matrix_a_scale_fmt", "matrix_a_scale_exp", "matrix_b_scale", "matrix_b_scale_fmt", "matrix_b_scale_exp", "matrix_a_reuse", "matrix_b_reuse"]; } -def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, __bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, _ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; -def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, __bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, _ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, _ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; +def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">; // GFX12.5 128B cooperative atomics def __builtin_amdgcn_cooperative_atomic_load_32x4B : AMDGPUBuiltin<"int(int *, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl new file mode 100644 index 0000000000000..641a43426efb4 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl @@ -0,0 +1,36 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \ +// RUN: -verify -S -o - %s + +typedef float v8f __attribute__((ext_vector_type(8))); +typedef half v8h __attribute__((ext_vector_type(8))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); +typedef __bf16 v16bf16 __attribute__((ext_vector_type(16))); +typedef __bf16 v32bf16 __attribute__((ext_vector_type(32))); +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v8i __attribute__((ext_vector_type(8))); +typedef int v16i __attribute__((ext_vector_type(16))); + +void test_amdgcn_swmmac_gfx1250(global v8f* out8f, global v8h* out8h, global v8bf16* out8bf16, global v8i* out8i, + v16bf16 a16bf16, v16h a16h, v8i a8i, + v32bf16 b32bf16, v32h b32h, v16i b16i, + v8f c8f, v8bf16 c8bf16, v8h c8h, v8i c8i, + int index, v2i index2) +{ + *out8f = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8bf16 = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8bf16, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a16bf16, 0, b32bf16, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8(a8i, b16i, c8f, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8(a8i, b16i, c8h, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8i = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a8i, 0, b16i, c8i, index2, false, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a16h, 0, b32h, c8f, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a16h, 0, b32h, c8h, index, false, true); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' needs target feature swmmac-gfx1250-insts,wavefrontsize32}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl new file mode 100644 index 0000000000000..fa4f3b5e40233 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl @@ -0,0 +1,31 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \ +// RUN: -verify -S -o - %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); +typedef float v8f __attribute__((ext_vector_type(8))); +typedef half v8h __attribute__((ext_vector_type(8))); +typedef short v8s __attribute__((ext_vector_type(8))); +typedef int v8i __attribute__((ext_vector_type(8))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef short v16s __attribute__((ext_vector_type(16))); + +void test_amdgcn_swmmac_w32(global v8f* out8f, global v8h* out8h, global v8s* out8s, global v8i* out8i, + v8h a8h, v8s a8s, v2i a2i, int ai, + v16h b16h, v16s b16s, v4i b4i, v2i b2i, + v8f c8f, v8h c8h, v8s c8s, v8i c8i, + int index) +{ + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a8h, b16h, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a8s, b16s, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a8h, b16h, c8h, index); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a8s, b16s, c8s, index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a2i, true, b4i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, ai, true, b2i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a2i, true, b4i, c8i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} + *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a2i, b4i, c8f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32' needs target feature swmmac-gfx1200-insts,wavefrontsize32}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl new file mode 100644 index 0000000000000..4cde9d78abf2f --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl @@ -0,0 +1,30 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \ +// RUN: -verify -S -o - %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef float v4f __attribute__((ext_vector_type(4))); +typedef half v4h __attribute__((ext_vector_type(4))); +typedef short v4s __attribute__((ext_vector_type(4))); +typedef int v4i __attribute__((ext_vector_type(4))); +typedef half v8h __attribute__((ext_vector_type(8))); +typedef short v8s __attribute__((ext_vector_type(8))); + +void test_amdgcn_swmmac_w64(global v4f* out4f, global v4h* out4h, global v4s* out4s, global v4i* out4i, + v4h a4h, v4s a4s, int ai, + v8h b8h, v8s b8s, v2i b2i, int bi, + v4f c4f, v4h c4h, v4s c4s, v4i c4i, + int index) +{ + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a4h, b8h, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a4s, b8s, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a4h, b8h, c4h, index); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a4s, b8s, c4s, index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, ai, true, b2i, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, ai, true, bi, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, ai, true, b2i, c4i, index, true); // expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} + *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(ai, b2i, c4f, index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64' needs target feature swmmac-gfx1200-insts,wavefrontsize64}} +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 616effeb5b9f2..d87e612cedd54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -814,6 +814,14 @@ defm WMMA128bInsts : AMDGPUSubtargetFeature<"wmma-128b-insts", "Has WMMA instructions where A and B matrices do not have duplicated data" >; +defm SWMMACGfx1200Insts : AMDGPUSubtargetFeature<"swmmac-gfx1200-insts", + "Has GFX1200 SWMMAC instructions" +>; + +defm SWMMACGfx1250Insts : AMDGPUSubtargetFeature<"swmmac-gfx1250-insts", + "Has GFX1250 SWMMAC instructions" +>; + defm PkFmacF16Inst : AMDGPUSubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; @@ -1950,6 +1958,7 @@ def FeatureISAVersion11_7_0 : FeatureSet< FeatureFP8ConversionInsts, FeatureDot11Insts, FeatureWMMA128bInsts, + FeatureSWMMACGfx1200Insts, FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16])>; @@ -1983,6 +1992,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureExtendedImageInsts, FeatureFP8ConversionInsts, FeatureWMMA128bInsts, + FeatureSWMMACGfx1200Insts, FeatureIEEEMinimumMaximumInsts, FeaturePackedTID, FeatureVcmpxPermlaneHazard, @@ -2080,7 +2090,9 @@ def FeatureISAVersion12_50_Common : FeatureSet< FeatureXNACK, FeatureClusters, FeatureD16Writes32BitVgpr, - FeatureMcastLoadInsts + FeatureMcastLoadInsts, + FeatureSWMMACGfx1200Insts, + FeatureSWMMACGfx1250Insts ]>; def FeatureISAVersion12_50 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 333240e0f7ac2..992c375069e77 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -2061,7 +2061,9 @@ let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX11PlusNot12_50, Othe defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>; defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>; defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>; +} +let WaveSizePredicate = isWave32, SubtargetPredicate = HasSWMMACGfx1200Insts in { def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>; def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>; def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>; @@ -2088,7 +2090,9 @@ let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX11PlusNot12_50, Othe defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>; defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>; defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>; +} +let WaveSizePredicate = isWave64, SubtargetPredicate = HasSWMMACGfx1200Insts in { def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>; def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>; def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index d335f9174b150..3664711d387bc 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -249,10 +249,13 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, StringMap<bool> &Features) { AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); switch (Kind) { - case GK_GFX1310: case GK_GFX1251: case GK_GFX1250: case GK_GFX12_5_GENERIC: + Features["swmmac-gfx1200-insts"] = true; + Features["swmmac-gfx1250-insts"] = true; + [[fallthrough]]; + case GK_GFX1310: Features["ci-insts"] = true; Features["dot7-insts"] = true; Features["dot8-insts"] = true; @@ -333,6 +336,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["cvt-pknorm-vop2-insts"] = true; Features["fp8-conversion-insts"] = true; Features["wmma-128b-insts"] = true; + Features["swmmac-gfx1200-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; break; case GK_GFX1170: @@ -361,6 +365,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dot11-insts"] = true; Features["fp8-conversion-insts"] = true; Features["wmma-128b-insts"] = true; + Features["swmmac-gfx1200-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; break; case GK_GFX1153: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
