Author: Alex Voicu Date: 2026-04-11T11:12:48+01:00 New Revision: 870f8d9edee7bf550fe12d3d6e25d209bb9b8608
URL: https://github.com/llvm/llvm-project/commit/870f8d9edee7bf550fe12d3d6e25d209bb9b8608 DIFF: https://github.com/llvm/llvm-project/commit/870f8d9edee7bf550fe12d3d6e25d209bb9b8608.diff LOG: [NFC][AMDGPU] Fix `gfx90a`+ MFMA builtins (#191537) `gfx90a` added a set of MFMA instructions that are not available on prior GFXIPs. The Clang builtins for these were requiring the `mai-insts` feature, which is incorrect (`gfx908` supports this and does not support the added MFMAs). This led to opaque bugs where we'd check with `__has_builtin` for the availability of the builtin, target 908, and get an ISEL failure. Added: clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl Modified: clang/include/clang/Basic/BuiltinsAMDGPU.td clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 17654daf6a469..db0fbb4b048e2 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -603,13 +603,13 @@ def __builtin_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUBuiltin<"_ExtVector<4, float>(_E def __builtin_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; def __builtin_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; -def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, double, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; +def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; +def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, double, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">; def __builtin_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUBuiltin<"_ExtVector<4, int>(int64_t, int64_t, _ExtVector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; def __builtin_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUBuiltin<"_ExtVector<16, int>(int64_t, int64_t, _ExtVector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl index 0c5a39c2c8520..c972b1611b6f7 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl @@ -61,13 +61,13 @@ void builtin_test_unsupported(double a_double, float a_float, a_v4f = __builtin_amdgcn_mfma_f32_4x4x2bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x2bf16' needs target feature mai-insts}} a_v16f = __builtin_amdgcn_mfma_f32_32x32x4bf16(a_v2s, a_v2s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16' needs target feature mai-insts}} a_v4f = __builtin_amdgcn_mfma_f32_16x16x8bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8bf16' needs target feature mai-insts}} - a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs target feature mai-insts}} - a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature mai-insts}} - a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature mai-insts}} - a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature mai-insts}} - a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature mai-insts}} - a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature mai-insts}} - a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature mai-insts}} + a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs target feature gfx90a-insts}} + a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature gfx90a-insts}} + a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature gfx90a-insts}} + a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature gfx90a-insts}} + a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature gfx90a-insts}} + a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature gfx90a-insts}} + a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature gfx90a-insts}} a_v4i = __builtin_amdgcn_mfma_i32_16x16x32_i8(a_long, a_long, a_v4i, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_i32_16x16x32_i8' needs target feature mai-insts}} a_v16i = __builtin_amdgcn_mfma_i32_32x32x16_i8(a_long, a_long, a_v16i, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_i32_32x32x16_i8' needs target feature mai-insts}} a_v4f = __builtin_amdgcn_mfma_f32_16x16x8_xf32(a_v2f, a_v2f, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8_xf32' needs target feature mai-insts}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl new file mode 100644 index 0000000000000..4e5baa51b08b8 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl @@ -0,0 +1,33 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \ +// RUN: -verify -S -o - %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef float v4f __attribute__((ext_vector_type(4))); +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v32f __attribute__((ext_vector_type(32))); +typedef half v4h __attribute__((ext_vector_type(4))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef int v4i __attribute__((ext_vector_type(4))); +typedef int v16i __attribute__((ext_vector_type(16))); +typedef int v32i __attribute__((ext_vector_type(32))); +typedef short v2s __attribute__((ext_vector_type(2))); +typedef short v4s __attribute__((ext_vector_type(4))); +typedef short v16s __attribute__((ext_vector_type(16))); +typedef short v32s __attribute__((ext_vector_type(32))); +typedef double v4d __attribute__((ext_vector_type(4))); + +void test_mfma_f32_16x16x4bf16_1k(global v16f* out, global v4f* out1, + global v4d* out2, global double* out3, v4s a, + v4s b, v16f c, v4f e, double f, double g, + v4d h) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a, b, c, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature gfx90a-insts}} + *out1 = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a, b, e, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature gfx90a-insts}} + *out = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a, b, c, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature gfx90a-insts}} + *out1 = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a, b, e, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature gfx90a-insts}} + *out2 = __builtin_amdgcn_mfma_f64_16x16x4f64(f, g, h, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature gfx90a-insts}} + *out3 = __builtin_amdgcn_mfma_f64_4x4x4f64(f, g, g, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature gfx90a-insts}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
