[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/117213
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/117213
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 24.76 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/117213.diff


12 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+7) 
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+6) 
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+1) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+215) 
- (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+36) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+22) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e93f570a6353b5..6548ea8df67ee0 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -448,6 +448,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, 
"V4fV8hV16hV4fiIiIi", "
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, 
"V16fV8hV16hV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, 
"V4fV8yV16yV4fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, 
"V16fV8yV16yV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, "V4iV4iV8iV4iiIiIi", 
"nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 1977abaec9d575..a7765d702e92f6 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -496,4 +496,11 @@ void test_smfmac_f32_32x32x32_bf16(global v16f* out, 
v8bf16 a, v16bf16 b, v16f c
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_i32_16x16x128_i8
+// CHECK-GFX950: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x128.i8(<4 x i32> 
%a, <8 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_i32_16x16x128_i8(global v4i* out, v4i a, v8i b, v4i c, int 
idx)
+{
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index a80926c2644851..4ba38daf3502d3 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -88,3 +88,9 @@ void test_smfmac_f32_32x32x32_bf16(global float16* out, 
bfloat8 a, bfloat16 b, f
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be 
a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be 
a constant integer}}
 }
+
+void test_smfmac_i32_16x16x128_i8(global int4* out, int4 a, int8 b, int4 c, 
int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be 
a constant integer}}
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be 
a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 090cd3348ee837..68460e07791fd4 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -38,6 +38,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out7 = __builtin_amdgcn_smfmac_f32_32x32x32_f16(a7, b7, c7, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_f16' needs target feature 
gfx950-insts}}
   *out8 = __builtin_amdgcn_smfmac_f32_16x16x64_bf16(a8, b8, c8, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x64_bf16' needs target 
feature gfx950-insts}}
   *out9 = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a9, b9, c9, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_bf16' needs target 
feature gfx950-insts}}
+  *out10 = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a10, b10, c10, 0, 0, 0); 
// expected-error{{'__builtin_amdgcn_smf

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/117213
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#117214** https://app.graphite.dev/github/pr/llvm/llvm-project/117214?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117213** https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#117212** https://app.graphite.dev/github/pr/llvm/llvm-project/117212?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117211** https://app.graphite.dev/github/pr/llvm/llvm-project/117211?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117205** https://app.graphite.dev/github/pr/llvm/llvm-project/117205?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117202** https://app.graphite.dev/github/pr/llvm/llvm-project/117202?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117055** https://app.graphite.dev/github/pr/llvm/llvm-project/117055?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117053** https://app.graphite.dev/github/pr/llvm/llvm-project/117053?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117052** https://app.graphite.dev/github/pr/llvm/llvm-project/117052?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116728** https://app.graphite.dev/github/pr/llvm/llvm-project/116728?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116724** https://app.graphite.dev/github/pr/llvm/llvm-project/116724?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>: 1 other dependent PR 
([#117047](https://github.com/llvm/llvm-project/pull/117047) https://app.graphite.dev/github/pr/llvm/llvm-project/117047?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>)
* **#116723** https://app.graphite.dev/github/pr/llvm/llvm-project/116723?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116722** https://app.graphite.dev/github/pr/llvm/llvm-project/116722?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116681** https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black

[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)

2024-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/117213

None

>From 8e48083967a60dc4a4a7c804a32be9b87ae7746d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 2 Feb 2024 15:52:26 +0530
Subject: [PATCH] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   1 +
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl |   7 +
 .../builtins-amdgcn-error-gfx950-param.cl |   6 +
 .../builtins-amdgcn-error-gfx950.cl   |   1 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   1 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |   4 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   3 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |   1 +
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   |   3 +
 .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll   | 215 ++
 llvm/test/MC/AMDGPU/mai-gfx950.s  |  36 +++
 .../MC/Disassembler/AMDGPU/gfx950_mai.txt |  22 ++
 12 files changed, 299 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e93f570a6353b5..6548ea8df67ee0 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -448,6 +448,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, 
"V4fV8hV16hV4fiIiIi", "
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, 
"V16fV8hV16hV16fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, 
"V4fV8yV16yV4fiIiIi", "nc", "gfx950-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, 
"V16fV8yV16yV16fiIiIi", "nc", "gfx950-insts")
+TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, "V4iV4iV8iV4iiIiIi", 
"nc", "gfx950-insts")
 
 
//===--===//
 // GFX12+ only builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 1977abaec9d575..a7765d702e92f6 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -496,4 +496,11 @@ void test_smfmac_f32_32x32x32_bf16(global v16f* out, 
v8bf16 a, v16bf16 b, v16f c
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, 0);
 }
 
+// CHECK-GFX950-LABEL: @test_smfmac_i32_16x16x128_i8
+// CHECK-GFX950: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x128.i8(<4 x i32> 
%a, <8 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0)
+void test_smfmac_i32_16x16x128_i8(global v4i* out, v4i a, v8i b, v4i c, int 
idx)
+{
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, 0);
+}
+
 #endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
index a80926c2644851..4ba38daf3502d3 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl
@@ -88,3 +88,9 @@ void test_smfmac_f32_32x32x32_bf16(global float16* out, 
bfloat8 a, bfloat16 b, f
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be 
a constant integer}}
   *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be 
a constant integer}}
 }
+
+void test_smfmac_i32_16x16x128_i8(global int4* out, int4 a, int8 b, int4 c, 
int idx, int d)
+{
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, d, 0); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be 
a constant integer}}
+  *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, d); // 
expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be 
a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
index 090cd3348ee837..68460e07791fd4 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl
@@ -38,6 +38,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 
c0,
   *out7 = __builtin_amdgcn_smfmac_f32_32x32x32_f16(a7, b7, c7, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_f16' needs target feature 
gfx950-insts}}
   *out8 = __builtin_amdgcn_smfmac_f32_16x16x64_bf16(a8, b8, c8, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x64_bf16' needs target 
feature gfx950-insts}}
   *out9 = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a9, b9, c9, 0, 0, 0); // 
expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_bf16' needs target 
feature gfx950-insts}}
+  *out10 = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a10, b10, c10, 0, 0, 0); 
// expected-error{{'__builtin_amdgcn_smfmac_i32_16x16x128_i8' needs targe