[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
arsenm wrote: ### Merge activity * **Nov 22, 2:45 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/117259). https://github.com/llvm/llvm-project/pull/117259 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/117259 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/117259 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117259 >From d5b3bb6210d19c81a935790c5267c3d97125a00d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 3 Feb 2024 21:43:00 +0530 Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 7 + .../builtins-amdgcn-error-gfx950-param.cl | 6 + .../builtins-amdgcn-error-gfx950.cl | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 + .../UniformityAnalysis/AMDGPU/intrinsics.ll | 9 + .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 414 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 36 ++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 22 + 12 files changed, 505 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f90af7000e3196..51a5b1dbad495c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -457,6 +457,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 33b60d53f11cc8..00346baa6ff84d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, v4i a, v8i b, v16f c, in *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index c53ca8a7c3513f..b3b359a1e0c65b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} } + +void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 9e563a7b0bd64c..57523cf0af1b18 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117259 >From d36a1301eb84377617c35c125e136230327eb3e9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 3 Feb 2024 21:43:00 +0530 Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 7 + .../builtins-amdgcn-error-gfx950-param.cl | 6 + .../builtins-amdgcn-error-gfx950.cl | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 + .../UniformityAnalysis/AMDGPU/intrinsics.ll | 9 + .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 414 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 36 ++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 22 + 12 files changed, 505 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f90af7000e3196..51a5b1dbad495c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -457,6 +457,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 33b60d53f11cc8..00346baa6ff84d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, v4i a, v8i b, v16f c, in *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index c53ca8a7c3513f..b3b359a1e0c65b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} } + +void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 9e563a7b0bd64c..57523cf0af1b18 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 33.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117259.diff 12 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+7) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl (+6) - (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+1) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2-1) - (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+2) - (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+9) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+414) - (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+22) ``diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f90af7000e3196..51a5b1dbad495c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -457,6 +457,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 33b60d53f11cc8..00346baa6ff84d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, v4i a, v8i b, v16f c, in *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index c53ca8a7c3513f..b3b359a1e0c65b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} } + +void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 9e563a7b0bd64c..57523cf0af1b18 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/117259 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#117261** https://app.graphite.dev/github/pr/llvm/llvm-project/117261?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117260** https://app.graphite.dev/github/pr/llvm/llvm-project/117260?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117259** https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/117259?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#117258** https://app.graphite.dev/github/pr/llvm/llvm-project/117258?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117257** https://app.graphite.dev/github/pr/llvm/llvm-project/117257?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117256** https://app.graphite.dev/github/pr/llvm/llvm-project/117256?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117235** https://app.graphite.dev/github/pr/llvm/llvm-project/117235?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117234** https://app.graphite.dev/github/pr/llvm/llvm-project/117234?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117233** https://app.graphite.dev/github/pr/llvm/llvm-project/117233?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117232** https://app.graphite.dev/github/pr/llvm/llvm-project/117232?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117214** https://app.graphite.dev/github/pr/llvm/llvm-project/117214?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117213** https://app.graphite.dev/github/pr/llvm/llvm-project/117213?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117212** https://app.graphite.dev/github/pr/llvm/llvm-project/117212?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117211** https://app.graphite.dev/github/pr/llvm/llvm-project/117211?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117205** https://app.graphite.dev/github/pr/llvm/llvm-project/117205?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117202** https://app.graphite.dev/github/pr/llvm/llvm-project/117202?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117055** https://app.graphite.dev/github/pr/llvm/llvm-project/117055?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117053** https://app.graphite.dev/github/pr/llvm/llvm-project/117053?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117052** https://app.graphite.dev/github/pr/llvm/llvm-project/117052?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116728** https://app.graphite.dev/github/pr/llvm/llvm-project/116728?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#116724** https
[llvm-branch-commits] [clang] [llvm] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 (PR #117259)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117259 None >From 253fcf59235b1bc4edd0b8ce811f900606aa2a67 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 3 Feb 2024 21:43:00 +0530 Subject: [PATCH] AMDGPU: Add v_smfmac_f32_32x32x64_fp8_fp8 for gfx950 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 7 + .../builtins-amdgcn-error-gfx950-param.cl | 6 + .../builtins-amdgcn-error-gfx950.cl | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 4 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 + .../UniformityAnalysis/AMDGPU/intrinsics.ll | 9 + .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 414 ++ llvm/test/MC/AMDGPU/mai-gfx950.s | 36 ++ .../MC/Disassembler/AMDGPU/gfx950_mai.txt | 22 + 12 files changed, 505 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f90af7000e3196..51a5b1dbad495c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -457,6 +457,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") //===--===// // GFX12+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index 33b60d53f11cc8..00346baa6ff84d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -559,4 +559,11 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, v4i a, v8i b, v16f c, in *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index c53ca8a7c3513f..b3b359a1e0c65b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -142,3 +142,9 @@ void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} } + +void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index 9e563a7b0bd64c..57523cf0af1b18 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -47,6 +47,7 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}} *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_