[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/117213 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/117213 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) Changes --- Patch is 24.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117213.diff 12 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+7) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+6) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+1) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2-1) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+3) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+215) - (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+22) ``diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e93f570a6353b5..6548ea8df67ee0 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -448,6 +448,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, "V4fV8hV16hV4fiIiIi", " TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, "V16fV8hV16hV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, "V4fV8yV16yV4fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, "V16fV8yV16yV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, "V4iV4iV8iV4iiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 1977abaec9d575..a7765d702e92f6 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -496,4 +496,11 @@ void test_smfmac_f32_32x32x32_bf16(global v16f* out, v8bf16 a, v16bf16 b, v16f c *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_i32_16x16x128_i8 +// CHECK-GFX950: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x128.i8(<4 x i32> %a, <8 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_i32_16x16x128_i8(global v4i* out, v4i a, v8i b, v4i c, int idx) +{ + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index a80926c2644851..4ba38daf3502d3 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -88,3 +88,9 @@ void test_smfmac_f32_32x32x32_bf16(global float16* out, bfloat8 a, bfloat16 b, f *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be a constant integer}} } + +void test_smfmac_i32_16x16x128_i8(global int4* out, int4 a, int8 b, int4 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 090cd3348ee837..68460e07791fd4 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -38,6 +38,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out7 = __builtin_amdgcn_smfmac_f32_32x32x32_f16(a7, b7, c7, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_f16' needs target feature gfx950-insts}} *out8 = __builtin_amdgcn_smfmac_f32_16x16x64_bf16(a8, b8, c8, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x64_bf16' needs target feature gfx950-insts}} *out9 = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a9, b9, c9, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_bf16' needs target feature gfx950-insts}} + *out10 = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a10, b10, c10, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smf
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/117213 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#117214** https://app.graphite.dev/github/pr/llvm/llvm-project/117214?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117213** https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#117212** https://app.graphite.dev/github/pr/llvm/llvm-project/117212?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117211** https://app.graphite.dev/github/pr/llvm/llvm-project/117211?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117205** https://app.graphite.dev/github/pr/llvm/llvm-project/117205?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117202** https://app.graphite.dev/github/pr/llvm/llvm-project/117202?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117055** https://app.graphite.dev/github/pr/llvm/llvm-project/117055?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117053** https://app.graphite.dev/github/pr/llvm/llvm-project/117053?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117052** https://app.graphite.dev/github/pr/llvm/llvm-project/117052?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116728** https://app.graphite.dev/github/pr/llvm/llvm-project/116728?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116724** https://app.graphite.dev/github/pr/llvm/llvm-project/116724?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#117047](https://github.com/llvm/llvm-project/pull/117047) https://app.graphite.dev/github/pr/llvm/llvm-project/117047?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * **#116723** https://app.graphite.dev/github/pr/llvm/llvm-project/116723?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116722** https://app.graphite.dev/github/pr/llvm/llvm-project/116722?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116681** https://app.graphite.dev/github/pr/llvm/llvm-project/116681?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116680** https://app.graphite.dev/github/pr/llvm/llvm-project/116680?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116679** https://app.graphite.dev/github/pr/llvm/llvm-project/116679?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116678** https://app.graphite.dev/github/pr/llvm/llvm-project/116678?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116312** https://app.graphite.dev/github/pr/llvm/llvm-project/116312?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116311** https://app.graphite.dev/github/pr/llvm/llvm-project/116311?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 (PR #117213)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117213 None >From 8e48083967a60dc4a4a7c804a32be9b87ae7746d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 2 Feb 2024 15:52:26 +0530 Subject: [PATCH] AMDGPU: Add v_smfmac_i32_16x16x128_i8 for gfx950 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 7 + .../builtins-amdgcn-error-gfx950-param.cl | 6 + .../builtins-amdgcn-error-gfx950.cl | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 3 + .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 215 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 36 +++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 22 ++ 12 files changed, 299 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e93f570a6353b5..6548ea8df67ee0 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -448,6 +448,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, "V4fV8hV16hV4fiIiIi", " TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, "V16fV8hV16hV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, "V4fV8yV16yV4fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, "V16fV8yV16yV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, "V4iV4iV8iV4iiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 1977abaec9d575..a7765d702e92f6 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -496,4 +496,11 @@ void test_smfmac_f32_32x32x32_bf16(global v16f* out, v8bf16 a, v16bf16 b, v16f c *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_i32_16x16x128_i8 +// CHECK-GFX950: call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x128.i8(<4 x i32> %a, <8 x i32> %b, <4 x i32> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_i32_16x16x128_i8(global v4i* out, v4i a, v8i b, v4i c, int idx) +{ + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index a80926c2644851..4ba38daf3502d3 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -88,3 +88,9 @@ void test_smfmac_f32_32x32x32_bf16(global float16* out, bfloat8 a, bfloat16 b, f *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x32_bf16' must be a constant integer}} } + +void test_smfmac_i32_16x16x128_i8(global int4* out, int4 a, int8 b, int4 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_i32_16x16x128_i8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 090cd3348ee837..68460e07791fd4 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -38,6 +38,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out7 = __builtin_amdgcn_smfmac_f32_32x32x32_f16(a7, b7, c7, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_f16' needs target feature gfx950-insts}} *out8 = __builtin_amdgcn_smfmac_f32_16x16x64_bf16(a8, b8, c8, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x64_bf16' needs target feature gfx950-insts}} *out9 = __builtin_amdgcn_smfmac_f32_32x32x32_bf16(a9, b9, c9, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x32_bf16' needs target feature gfx950-insts}} + *out10 = __builtin_amdgcn_smfmac_i32_16x16x128_i8(a10, b10, c10, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_i32_16x16x128_i8' needs targe