Author: Alex Voicu
Date: 2026-04-11T11:12:48+01:00
New Revision: 870f8d9edee7bf550fe12d3d6e25d209bb9b8608

URL: 
https://github.com/llvm/llvm-project/commit/870f8d9edee7bf550fe12d3d6e25d209bb9b8608
DIFF: 
https://github.com/llvm/llvm-project/commit/870f8d9edee7bf550fe12d3d6e25d209bb9b8608.diff

LOG: [NFC][AMDGPU] Fix `gfx90a`+ MFMA builtins (#191537)

`gfx90a` added a set of MFMA instructions that are not available on
prior GFXIPs. The Clang builtins for these were requiring the
`mai-insts` feature, which is incorrect (`gfx908` supports this and does
not support the added MFMAs). This led to opaque bugs where we'd check
with `__has_builtin` for the availability of the builtin, target 908,
and get an ISEL failure.

Added: 
    clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.td
    clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td 
b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index 17654daf6a469..db0fbb4b048e2 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -603,13 +603,13 @@ def __builtin_amdgcn_mfma_f32_4x4x2bf16 : 
AMDGPUBuiltin<"_ExtVector<4, float>(_E
 def __builtin_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUBuiltin<"_ExtVector<16, 
float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<16, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
 def __builtin_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUBuiltin<"_ExtVector<4, 
float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<4, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
 
-def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, 
double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, 
_Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, 
double, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, 
float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, 
_Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, 
double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, 
_Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, 
double, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
 
 def __builtin_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUBuiltin<"_ExtVector<4, 
int>(int64_t, int64_t, _ExtVector<4, int>, _Constant int, _Constant int, 
_Constant int)", [Const], "mai-insts">;
 def __builtin_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUBuiltin<"_ExtVector<16, 
int>(int64_t, int64_t, _ExtVector<16, int>, _Constant int, _Constant int, 
_Constant int)", [Const], "mai-insts">;

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
index 0c5a39c2c8520..c972b1611b6f7 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
@@ -61,13 +61,13 @@ void builtin_test_unsupported(double a_double, float 
a_float,
   a_v4f = __builtin_amdgcn_mfma_f32_4x4x2bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); 
// expected-error {{'__builtin_amdgcn_mfma_f32_4x4x2bf16' needs target feature 
mai-insts}}
   a_v16f = __builtin_amdgcn_mfma_f32_32x32x4bf16(a_v2s, a_v2s, a_v16f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16' needs target 
feature mai-insts}}
   a_v4f = __builtin_amdgcn_mfma_f32_16x16x8bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); 
// expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8bf16' needs target 
feature mai-insts}}
-  a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs 
target feature mai-insts}}
-  a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs 
target feature mai-insts}}
-  a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target 
feature mai-insts}}
-  a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs 
target feature mai-insts}}
-  a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs 
target feature mai-insts}}
-  a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target 
feature mai-insts}}
-  a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 
0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target 
feature mai-insts}}
+  a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs 
target feature gfx90a-insts}}
+  a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs 
target feature gfx90a-insts}}
+  a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target 
feature gfx90a-insts}}
+  a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs 
target feature gfx90a-insts}}
+  a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs 
target feature gfx90a-insts}}
+  a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 
0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target 
feature gfx90a-insts}}
+  a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 
0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target 
feature gfx90a-insts}}
   a_v4i = __builtin_amdgcn_mfma_i32_16x16x32_i8(a_long, a_long, a_v4i, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_i32_16x16x32_i8' needs target 
feature mai-insts}}
   a_v16i = __builtin_amdgcn_mfma_i32_32x32x16_i8(a_long, a_long, a_v16i, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_i32_32x32x16_i8' needs target 
feature mai-insts}}
   a_v4f = __builtin_amdgcn_mfma_f32_16x16x8_xf32(a_v2f, a_v2f, a_v4f, 0, 0, 
0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8_xf32' needs target 
feature mai-insts}}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl
new file mode 100644
index 0000000000000..4e5baa51b08b8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl
@@ -0,0 +1,33 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu 
gfx908 \
+// RUN:   -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef float  v4f   __attribute__((ext_vector_type(4)));
+typedef float  v16f  __attribute__((ext_vector_type(16)));
+typedef float  v32f  __attribute__((ext_vector_type(32)));
+typedef half   v4h   __attribute__((ext_vector_type(4)));
+typedef half   v16h  __attribute__((ext_vector_type(16)));
+typedef half   v32h  __attribute__((ext_vector_type(32)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+typedef int    v16i  __attribute__((ext_vector_type(16)));
+typedef int    v32i  __attribute__((ext_vector_type(32)));
+typedef short  v2s   __attribute__((ext_vector_type(2)));
+typedef short  v4s   __attribute__((ext_vector_type(4)));
+typedef short  v16s  __attribute__((ext_vector_type(16)));
+typedef short  v32s  __attribute__((ext_vector_type(32)));
+typedef double v4d   __attribute__((ext_vector_type(4)));
+
+void test_mfma_f32_16x16x4bf16_1k(global v16f* out, global v4f* out1,
+                                  global v4d* out2, global double* out3, v4s a,
+                                  v4s b, v16f c, v4f e, double f, double g,
+                                  v4d h)
+{
+  *out = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a, b, c, 0, 0, 0);   // 
expected-error{{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature 
gfx90a-insts}}
+  *out1 = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a, b, e, 0, 0, 0);    // 
expected-error{{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature 
gfx90a-insts}}
+  *out = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a, b, c, 0, 0, 0);   // 
expected-error{{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature 
gfx90a-insts}}
+  *out1 = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a, b, e, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target 
feature gfx90a-insts}}
+  *out2 = __builtin_amdgcn_mfma_f64_16x16x4f64(f, g, h, 0, 0, 0);      // 
expected-error{{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature 
gfx90a-insts}}
+  *out3 = __builtin_amdgcn_mfma_f64_4x4x4f64(f, g, g, 0, 0, 0);        // 
expected-error{{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature 
gfx90a-insts}}
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to