[clang] 987aa34 - Corrected clang amdgpu-features.cl test for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins)
Author: Konstantin Pyzhov Date: 2020-01-28T05:41:42-05:00 New Revision: 987aa3435f4517d663f776e261683b1620eb8101 URL: https://github.com/llvm/llvm-project/commit/987aa3435f4517d663f776e261683b1620eb8101 DIFF: https://github.com/llvm/llvm-project/commit/987aa3435f4517d663f776e261683b1620eb8101.diff LOG: Corrected clang amdgpu-features.cl test for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins) Differential Revision: https://reviews.llvm.org/D72723 Added: Modified: clang/test/CodeGenOpenCL/amdgpu-features.cl Removed: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index f3ed26494831..d8eb2d26b0e3 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -16,7 +16,7 @@ // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" +// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] ac9b2a6 - Add missing clang tests for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins)
Author: Konstantin Pyzhov Date: 2020-01-28T04:41:21-05:00 New Revision: ac9b2a6297420a461f7b9db9e2dbd67f5f07f301 URL: https://github.com/llvm/llvm-project/commit/ac9b2a6297420a461f7b9db9e2dbd67f5f07f301 DIFF: https://github.com/llvm/llvm-project/commit/ac9b2a6297420a461f7b9db9e2dbd67f5f07f301.diff LOG: Add missing clang tests for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins) Differential Revision: https://reviews.llvm.org/D72723 Added: Modified: clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl Removed: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index e69de29bb2d1..c7db942b871a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -0,0 +1,161 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef float v4f __attribute__((ext_vector_type(4))); +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v32f __attribute__((ext_vector_type(32))); +typedef half v4h __attribute__((ext_vector_type(4))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef intv4i __attribute__((ext_vector_type(4))); +typedef intv16i __attribute__((ext_vector_type(16))); +typedef intv32i __attribute__((ext_vector_type(32))); +typedef short v2s __attribute__((ext_vector_type(2))); +typedef short v4s __attribute__((ext_vector_type(4))); +typedef short v16s __attribute__((ext_vector_type(16))); +typedef short v32s __attribute__((ext_vector_type(32))); +typedef double v4d __attribute__((ext_vector_type(4))); + + +// CHECK-LABEL: @test_mfma_f32_32x32x1f32 +// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x1f32(global v32f* out, float a, float b, v32f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x1f32 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x1f32(global v16f* out, float a, float b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_4x4x1f32 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_4x4x1f32(global v4f* out, float a, float b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x2f32 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x2f32(global v16f* out, float a, float b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x4f32 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x4f32(global v4f* out, float a, float b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x4f16 +// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %a, <4 x half> %b, <32 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x4f16(global v32f* out, v4h a, v4h b, v32f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x4f16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x4f16(global v16f* out, v4h a, v4h b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_4x4x4f16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_4x4x4f16(global v4f* out, v4h a, v4h b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x8f16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x8f16(global v16f* out, v4h a, v4h b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x16f16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0,
[clang] 6d614a8 - Summary:
Author: Konstantin Pyzhov Date: 2020-01-28T03:51:27-05:00 New Revision: 6d614a82a4230ea69e322f56dc18dcbd815ed37b URL: https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b DIFF: https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b.diff LOG: Summary: This CL adds clang declarations of built-in functions for AMDGPU MFMA intrinsics and instructions. OpenCL tests for new built-ins are included. Differential Revision: https://reviews.llvm.org/D72723 Added: clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp llvm/include/llvm/IR/IntrinsicsAMDGPU.td Removed: diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 9b3a0f96798f..a9143ad8292c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -212,5 +212,30 @@ BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc") BUILTIN(__builtin_r600_recipsqrt_ieee, "dd", "nc") BUILTIN(__builtin_r600_recipsqrt_ieeef, "ff", "nc") +//===--===// +// MFMA builtins. +//===--===// + +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x1f32, "V32fffV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x1f32, "V16fffV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x1f32, "V4fffV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2f32, "V16fffV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f32, "V4fffV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4f16, "V32fV4hV4hV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x4i8, "V32iiiV32iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x4i8, "V16iiiV16iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_4x4x4i8, "V4iiiV4iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x8i8, "V16iiiV16iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x16i8, "V4iiiV4iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2bf16, "V32fV2sV2sV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x2bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x2bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 249a123ea605..0aaf6813442a 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -163,6 +163,7 @@ bool AMDGPUTargetInfo::initFeatureMap( Features["dot4-insts"] = true; Features["dot5-insts"] = true; Features["dot6-insts"] = true; + Features["mai-insts"] = true; LLVM_FALLTHROUGH; case GK_GFX906: Features["dl-insts"] = true; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl new file mode 100644 index ..e69de29bb2d1 diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl new file mode 100644 index ..e69de29bb2d1 diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 71cea8c1f3d5..68e8a830ecac 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1725,105 +1725,125 @@ def int_amdgcn_buffer_atomic_fadd: AMDGPUBufferAtomicNoRtn; def int_amdgcn_global_atomic_fadd: AMDGPUGlobalAtomicNoRtn; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp -def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def
r364183 - [CUDA][HIP] Don't set comdat attribute for CUDA device stub functions.\nDifferential Revision: https://reviews.llvm.org/D63277
Author: kpyzhov Date: Mon Jun 24 07:40:20 2019 New Revision: 364183 URL: http://llvm.org/viewvc/llvm-project?rev=364183=rev Log: [CUDA][HIP] Don't set comdat attribute for CUDA device stub functions.\nDifferential Revision: https://reviews.llvm.org/D63277 Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=364183=364182=364183=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 24 07:40:20 2019 @@ -3712,6 +3712,11 @@ static bool shouldBeInCOMDAT(CodeGenModu if (!CGM.supportsCOMDAT()) return false; + // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent + // them being "merged" by the COMDAT Folding linker optimization. + if (D.hasAttr()) +return false; + if (D.hasAttr()) return true; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits