[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-22 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Nov 22, 2:45 PM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/117259).


https://github.com/llvm/llvm-project/pull/117259
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-22 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/117259
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-22 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/117259
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/117259

>From d5b3bb6210d19c81a935790c5267c3d97125a00d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 3 Feb 2024 21:43:00 +0530
Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   1 +
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl |   7 +
 .../builtins-amdgcn-error-gfx950-param.cl |   6 +
 .../builtins-amdgcn-error-gfx950.cl   |   1 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   1 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |   4 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   3 +-
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   |   2 +
 .../UniformityAnalysis/AMDGPU/intrinsics.ll   |   9 +
 .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll   | 414 ++
 llvm/test/MC/AMDGPU/mai-gfx950.s  |  36 ++
 .../MC/Disassembler/AMDGPU/gfx950_mai.txt |  22 +
 12 files changed, 505 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f90af7000e3196..51a5b1dbad495c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -457,6 +457,7 @@ 
TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 33b60d53f11cc8..00346baa6ff84d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, 
v4i a, v8i b, v16f c, in
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8
+// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 
x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, 
int idx)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index c53ca8a7c3513f..b3b359a1e0c65b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, 
int4 a, int8 b, float
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
 }
+
+void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, 
float16 c, int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 9e563a7b0bd64c..57523cf0af1b18 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs 
target feature gfx950-insts}}
+  *out13 = __builtin_amdgcn_smfmac_f32_32

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/117259

>From d36a1301eb84377617c35c125e136230327eb3e9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 3 Feb 2024 21:43:00 +0530
Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   1 +
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl |   7 +
 .../builtins-amdgcn-error-gfx950-param.cl |   6 +
 .../builtins-amdgcn-error-gfx950.cl   |   1 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   1 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |   4 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   3 +-
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   |   2 +
 .../UniformityAnalysis/AMDGPU/intrinsics.ll   |   9 +
 .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll   | 414 ++
 llvm/test/MC/AMDGPU/mai-gfx950.s  |  36 ++
 .../MC/Disassembler/AMDGPU/gfx950_mai.txt |  22 +
 12 files changed, 505 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f90af7000e3196..51a5b1dbad495c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -457,6 +457,7 @@ 
TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 33b60d53f11cc8..00346baa6ff84d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, 
v4i a, v8i b, v16f c, in
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8
+// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 
x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, 
int idx)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index c53ca8a7c3513f..b3b359a1e0c65b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, 
int4 a, int8 b, float
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
 }
+
+void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, 
float16 c, int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 9e563a7b0bd64c..57523cf0af1b18 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs 
target feature gfx950-insts}}
+  *out13 = __builtin_amdgcn_smfmac_f32_32

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 33.74 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/117259.diff


12 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+7) 
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+6) 
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+1) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+2) 
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+9) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+414) 
- (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+36) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+22) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f90af7000e3196..51a5b1dbad495c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -457,6 +457,7 @@ 
TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 33b60d53f11cc8..00346baa6ff84d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, 
v4i a, v8i b, v16f c, in
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8
+// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 
x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, 
int idx)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index c53ca8a7c3513f..b3b359a1e0c65b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, 
int4 a, int8 b, float
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
 }
+
+void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, 
float16 c, int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 9e563a7b0bd64c..57523cf0af1b18 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs 
target 

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/117259
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#117261** https://app.graphite.dev/github/pr/llvm/llvm-project/117261?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117260** https://app.graphite.dev/github/pr/llvm/llvm-project/117260?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117259** https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#117258** https://app.graphite.dev/github/pr/llvm/llvm-project/117258?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117257** https://app.graphite.dev/github/pr/llvm/llvm-project/117257?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117256** https://app.graphite.dev/github/pr/llvm/llvm-project/117256?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117235** https://app.graphite.dev/github/pr/llvm/llvm-project/117235?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117234** https://app.graphite.dev/github/pr/llvm/llvm-project/117234?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117233** https://app.graphite.dev/github/pr/llvm/llvm-project/117233?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117232** https://app.graphite.dev/github/pr/llvm/llvm-project/117232?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117214** https://app.graphite.dev/github/pr/llvm/llvm-project/117214?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117213** https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117212** https://app.graphite.dev/github/pr/llvm/llvm-project/117212?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117211** https://app.graphite.dev/github/pr/llvm/llvm-project/117211?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117205** https://app.graphite.dev/github/pr/llvm/llvm-project/117205?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117202** https://app.graphite.dev/github/pr/llvm/llvm-project/117202?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117055** https://app.graphite.dev/github/pr/llvm/llvm-project/117055?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117053** https://app.graphite.dev/github/pr/llvm/llvm-project/117053?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117052** https://app.graphite.dev/github/pr/llvm/llvm-project/117052?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116728** https://app.graphite.dev/github/pr/llvm/llvm-project/116728?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116724** https

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/117259

None

>From 253fcf59235b1bc4edd0b8ce811f900606aa2a67 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 3 Feb 2024 21:43:00 +0530
Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   1 +
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl |   7 +
 .../builtins-amdgcn-error-gfx950-param.cl |   6 +
 .../builtins-amdgcn-error-gfx950.cl   |   1 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   1 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |   4 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   3 +-
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   |   2 +
 .../UniformityAnalysis/AMDGPU/intrinsics.ll   |   9 +
 .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll   | 414 ++
 llvm/test/MC/AMDGPU/mai-gfx950.s  |  36 ++
 .../MC/Disassembler/AMDGPU/gfx950_mai.txt |  22 +
 12 files changed, 505 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f90af7000e3196..51a5b1dbad495c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -457,6 +457,7 @@ 
TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, 
"V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 33b60d53f11cc8..00346baa6ff84d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, 
v4i a, v8i b, v16f c, in
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8
+// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 
x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, 
int idx)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index c53ca8a7c3513f..b3b359a1e0c65b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, 
int4 a, int8 b, float
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must 
be a constant integer}}
 }
+
+void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, 
float16 c, int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+  *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must 
be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 9e563a7b0bd64c..57523cf0af1b18 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs 
target feature gfx950-insts}}
   *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 
0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs 
target feature gfx950-insts}}
+  *out13 = __builtin_amdgcn_smfmac_